xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e27462030b33abe6a0fb4d726cfc6dd1cd15d70f)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938   VecScatter     Mvctx = a->Mvctx;
939 
940   PetscFunctionBegin;
941   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
942   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
943 
944   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
946   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
962 {
963   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
969   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
975 {
976   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
977   PetscErrorCode ierr;
978   PetscBool      merged;
979 
980   PetscFunctionBegin;
981   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
982   /* do nondiagonal part */
983   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
984   if (!merged) {
985     /* send it on its way */
986     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
987     /* do local part */
988     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
989     /* receive remote parts: note this assumes the values are not actually */
990     /* added in yy until the next line, */
991     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
992   } else {
993     /* do local part */
994     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
995     /* send it on its way */
996     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997     /* values actually were received in the Begin() but we need to call this nop */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1004 {
1005   MPI_Comm       comm;
1006   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1007   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1008   IS             Me,Notme;
1009   PetscErrorCode ierr;
1010   PetscInt       M,N,first,last,*notme,i;
1011   PetscMPIInt    size;
1012 
1013   PetscFunctionBegin;
1014   /* Easy test: symmetric diagonal block */
1015   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1016   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1017   if (!*f) PetscFunctionReturn(0);
1018   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1020   if (size == 1) PetscFunctionReturn(0);
1021 
1022   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1023   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1024   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1025   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1026   for (i=0; i<first; i++) notme[i] = i;
1027   for (i=last; i<M; i++) notme[i-last+first] = i;
1028   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1029   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1030   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1031   Aoff = Aoffs[0];
1032   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1033   Boff = Boffs[0];
1034   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1035   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1036   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1037   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1038   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1039   ierr = PetscFree(notme);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1044 {
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056 
1057   PetscFunctionBegin;
1058   /* do nondiagonal part */
1059   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1060   /* send it on its way */
1061   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1062   /* do local part */
1063   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1064   /* receive remote parts */
1065   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 /*
1070   This only works correctly for square matrices where the subblock A->A is the
1071    diagonal block
1072 */
1073 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1074 {
1075   PetscErrorCode ierr;
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077 
1078   PetscFunctionBegin;
1079   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1080   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1081   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1086 {
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1092   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1097 {
1098   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102 #if defined(PETSC_USE_LOG)
1103   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1104 #endif
1105   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1106   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1107   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1108   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1109 #if defined(PETSC_USE_CTABLE)
1110   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1111 #else
1112   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1113 #endif
1114   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1115   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1116   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1117   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1118   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1119   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1120   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1121 
1122   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1131 #if defined(PETSC_HAVE_ELEMENTAL)
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1133 #endif
1134 #if defined(PETSC_HAVE_HYPRE)
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1137 #endif
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1142 {
1143   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1144   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1145   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1146   PetscErrorCode ierr;
1147   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1148   int            fd;
1149   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1150   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1151   PetscScalar    *column_values;
1152   PetscInt       message_count,flowcontrolcount;
1153   FILE           *file;
1154 
1155   PetscFunctionBegin;
1156   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1157   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1158   nz   = A->nz + B->nz;
1159   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1160   if (!rank) {
1161     header[0] = MAT_FILE_CLASSID;
1162     header[1] = mat->rmap->N;
1163     header[2] = mat->cmap->N;
1164 
1165     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1166     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1167     /* get largest number of rows any processor has */
1168     rlen  = mat->rmap->n;
1169     range = mat->rmap->range;
1170     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1171   } else {
1172     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1173     rlen = mat->rmap->n;
1174   }
1175 
1176   /* load up the local row counts */
1177   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1178   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1179 
1180   /* store the row lengths to the file */
1181   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1182   if (!rank) {
1183     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1184     for (i=1; i<size; i++) {
1185       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1186       rlen = range[i+1] - range[i];
1187       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1189     }
1190     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1191   } else {
1192     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1193     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1197 
1198   /* load up the local column indices */
1199   nzmax = nz; /* th processor needs space a largest processor needs */
1200   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1201   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1202   cnt   = 0;
1203   for (i=0; i<mat->rmap->n; i++) {
1204     for (j=B->i[i]; j<B->i[i+1]; j++) {
1205       if ((col = garray[B->j[j]]) > cstart) break;
1206       column_indices[cnt++] = col;
1207     }
1208     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1209     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1210   }
1211   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1212 
1213   /* store the column indices to the file */
1214   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1215   if (!rank) {
1216     MPI_Status status;
1217     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1218     for (i=1; i<size; i++) {
1219       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1220       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1221       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1222       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1224     }
1225     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1226   } else {
1227     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1228     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1229     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1231   }
1232   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1233 
1234   /* load up the local column values */
1235   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1236   cnt  = 0;
1237   for (i=0; i<mat->rmap->n; i++) {
1238     for (j=B->i[i]; j<B->i[i+1]; j++) {
1239       if (garray[B->j[j]] > cstart) break;
1240       column_values[cnt++] = B->a[j];
1241     }
1242     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1243     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1244   }
1245   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1246 
1247   /* store the column values to the file */
1248   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1249   if (!rank) {
1250     MPI_Status status;
1251     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1252     for (i=1; i<size; i++) {
1253       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1254       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1255       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1256       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1257       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1258     }
1259     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1260   } else {
1261     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1262     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1264     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1265   }
1266   ierr = PetscFree(column_values);CHKERRQ(ierr);
1267 
1268   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1269   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #include <petscdraw.h>
1274 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1275 {
1276   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1277   PetscErrorCode    ierr;
1278   PetscMPIInt       rank = aij->rank,size = aij->size;
1279   PetscBool         isdraw,iascii,isbinary;
1280   PetscViewer       sviewer;
1281   PetscViewerFormat format;
1282 
1283   PetscFunctionBegin;
1284   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1285   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1287   if (iascii) {
1288     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1289     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1290       MatInfo   info;
1291       PetscBool inodes;
1292 
1293       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1294       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1295       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1296       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1297       if (!inodes) {
1298         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1299                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1300       } else {
1301         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1302                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1303       }
1304       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1306       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1308       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1309       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1310       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1311       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1312       PetscFunctionReturn(0);
1313     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1314       PetscInt inodecount,inodelimit,*inodes;
1315       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1316       if (inodes) {
1317         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1318       } else {
1319         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1320       }
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1323       PetscFunctionReturn(0);
1324     }
1325   } else if (isbinary) {
1326     if (size == 1) {
1327       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1328       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1329     } else {
1330       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1331     }
1332     PetscFunctionReturn(0);
1333   } else if (isdraw) {
1334     PetscDraw draw;
1335     PetscBool isnull;
1336     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1337     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1338     if (isnull) PetscFunctionReturn(0);
1339   }
1340 
1341   {
1342     /* assemble the entire matrix onto first processor. */
1343     Mat        A;
1344     Mat_SeqAIJ *Aloc;
1345     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1346     MatScalar  *a;
1347 
1348     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1349     if (!rank) {
1350       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1351     } else {
1352       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1353     }
1354     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1355     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1356     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1357     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1358     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1359 
1360     /* copy over the A part */
1361     Aloc = (Mat_SeqAIJ*)aij->A->data;
1362     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1363     row  = mat->rmap->rstart;
1364     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1365     for (i=0; i<m; i++) {
1366       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1367       row++;
1368       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1369     }
1370     aj = Aloc->j;
1371     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1372 
1373     /* copy over the B part */
1374     Aloc = (Mat_SeqAIJ*)aij->B->data;
1375     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1376     row  = mat->rmap->rstart;
1377     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1378     ct   = cols;
1379     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1380     for (i=0; i<m; i++) {
1381       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1382       row++;
1383       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1384     }
1385     ierr = PetscFree(ct);CHKERRQ(ierr);
1386     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1387     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388     /*
1389        Everyone has to call to draw the matrix since the graphics waits are
1390        synchronized across all processors that share the PetscDraw object
1391     */
1392     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1393     if (!rank) {
1394       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1395       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1396     }
1397     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1398     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1399     ierr = MatDestroy(&A);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1405 {
1406   PetscErrorCode ierr;
1407   PetscBool      iascii,isdraw,issocket,isbinary;
1408 
1409   PetscFunctionBegin;
1410   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1414   if (iascii || isdraw || isbinary || issocket) {
1415     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1421 {
1422   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1423   PetscErrorCode ierr;
1424   Vec            bb1 = 0;
1425   PetscBool      hasop;
1426 
1427   PetscFunctionBegin;
1428   if (flag == SOR_APPLY_UPPER) {
1429     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1430     PetscFunctionReturn(0);
1431   }
1432 
1433   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1434     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1435   }
1436 
1437   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1438     if (flag & SOR_ZERO_INITIAL_GUESS) {
1439       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1440       its--;
1441     }
1442 
1443     while (its--) {
1444       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1445       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1446 
1447       /* update rhs: bb1 = bb - B*x */
1448       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1449       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1450 
1451       /* local sweep */
1452       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1453     }
1454   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1455     if (flag & SOR_ZERO_INITIAL_GUESS) {
1456       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1457       its--;
1458     }
1459     while (its--) {
1460       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462 
1463       /* update rhs: bb1 = bb - B*x */
1464       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1465       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1466 
1467       /* local sweep */
1468       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1469     }
1470   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1471     if (flag & SOR_ZERO_INITIAL_GUESS) {
1472       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1473       its--;
1474     }
1475     while (its--) {
1476       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478 
1479       /* update rhs: bb1 = bb - B*x */
1480       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1481       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1482 
1483       /* local sweep */
1484       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1485     }
1486   } else if (flag & SOR_EISENSTAT) {
1487     Vec xx1;
1488 
1489     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1490     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1491 
1492     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494     if (!mat->diag) {
1495       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1496       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1497     }
1498     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1499     if (hasop) {
1500       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1501     } else {
1502       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1503     }
1504     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1505 
1506     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1507 
1508     /* local sweep */
1509     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1510     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1511     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1512   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1513 
1514   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1515 
1516   matin->factorerrortype = mat->A->factorerrortype;
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1521 {
1522   Mat            aA,aB,Aperm;
1523   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1524   PetscScalar    *aa,*ba;
1525   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1526   PetscSF        rowsf,sf;
1527   IS             parcolp = NULL;
1528   PetscBool      done;
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1533   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1534   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1535   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1536 
1537   /* Invert row permutation to find out where my rows should go */
1538   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1539   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1540   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1541   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1542   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1543   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1544 
1545   /* Invert column permutation to find out where my columns should go */
1546   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1547   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1548   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1549   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1550   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1551   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1552   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1553 
1554   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1555   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1556   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1557 
1558   /* Find out where my gcols should go */
1559   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1560   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1561   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1562   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1563   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1564   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1565   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1566   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1567 
1568   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1569   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1570   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1571   for (i=0; i<m; i++) {
1572     PetscInt row = rdest[i],rowner;
1573     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1574     for (j=ai[i]; j<ai[i+1]; j++) {
1575       PetscInt cowner,col = cdest[aj[j]];
1576       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1577       if (rowner == cowner) dnnz[i]++;
1578       else onnz[i]++;
1579     }
1580     for (j=bi[i]; j<bi[i+1]; j++) {
1581       PetscInt cowner,col = gcdest[bj[j]];
1582       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1583       if (rowner == cowner) dnnz[i]++;
1584       else onnz[i]++;
1585     }
1586   }
1587   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1588   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1589   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1590   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1591   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1592 
1593   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1594   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1595   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1596   for (i=0; i<m; i++) {
1597     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1598     PetscInt j0,rowlen;
1599     rowlen = ai[i+1] - ai[i];
1600     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1601       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1602       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1603     }
1604     rowlen = bi[i+1] - bi[i];
1605     for (j0=j=0; j<rowlen; j0=j) {
1606       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1607       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1608     }
1609   }
1610   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1611   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1612   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1613   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1614   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1615   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1616   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1617   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1618   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1619   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1620   *B = Aperm;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1625 {
1626   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1631   if (ghosts) *ghosts = aij->garray;
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1636 {
1637   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1638   Mat            A    = mat->A,B = mat->B;
1639   PetscErrorCode ierr;
1640   PetscReal      isend[5],irecv[5];
1641 
1642   PetscFunctionBegin;
1643   info->block_size = 1.0;
1644   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1645 
1646   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1647   isend[3] = info->memory;  isend[4] = info->mallocs;
1648 
1649   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1650 
1651   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1652   isend[3] += info->memory;  isend[4] += info->mallocs;
1653   if (flag == MAT_LOCAL) {
1654     info->nz_used      = isend[0];
1655     info->nz_allocated = isend[1];
1656     info->nz_unneeded  = isend[2];
1657     info->memory       = isend[3];
1658     info->mallocs      = isend[4];
1659   } else if (flag == MAT_GLOBAL_MAX) {
1660     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1661 
1662     info->nz_used      = irecv[0];
1663     info->nz_allocated = irecv[1];
1664     info->nz_unneeded  = irecv[2];
1665     info->memory       = irecv[3];
1666     info->mallocs      = irecv[4];
1667   } else if (flag == MAT_GLOBAL_SUM) {
1668     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1669 
1670     info->nz_used      = irecv[0];
1671     info->nz_allocated = irecv[1];
1672     info->nz_unneeded  = irecv[2];
1673     info->memory       = irecv[3];
1674     info->mallocs      = irecv[4];
1675   }
1676   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1677   info->fill_ratio_needed = 0;
1678   info->factor_mallocs    = 0;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1683 {
1684   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1685   PetscErrorCode ierr;
1686 
1687   PetscFunctionBegin;
1688   switch (op) {
1689   case MAT_NEW_NONZERO_LOCATIONS:
1690   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1691   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1692   case MAT_KEEP_NONZERO_PATTERN:
1693   case MAT_NEW_NONZERO_LOCATION_ERR:
1694   case MAT_USE_INODES:
1695   case MAT_IGNORE_ZERO_ENTRIES:
1696     MatCheckPreallocated(A,1);
1697     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1698     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1699     break;
1700   case MAT_ROW_ORIENTED:
1701     MatCheckPreallocated(A,1);
1702     a->roworiented = flg;
1703 
1704     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1705     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1706     break;
1707   case MAT_NEW_DIAGONALS:
1708     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1709     break;
1710   case MAT_IGNORE_OFF_PROC_ENTRIES:
1711     a->donotstash = flg;
1712     break;
1713   case MAT_SPD:
1714     A->spd_set = PETSC_TRUE;
1715     A->spd     = flg;
1716     if (flg) {
1717       A->symmetric                  = PETSC_TRUE;
1718       A->structurally_symmetric     = PETSC_TRUE;
1719       A->symmetric_set              = PETSC_TRUE;
1720       A->structurally_symmetric_set = PETSC_TRUE;
1721     }
1722     break;
1723   case MAT_SYMMETRIC:
1724     MatCheckPreallocated(A,1);
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_STRUCTURALLY_SYMMETRIC:
1728     MatCheckPreallocated(A,1);
1729     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_HERMITIAN:
1732     MatCheckPreallocated(A,1);
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_SYMMETRY_ETERNAL:
1736     MatCheckPreallocated(A,1);
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     break;
1739   case MAT_SUBMAT_SINGLEIS:
1740     A->submat_singleis = flg;
1741     break;
1742   case MAT_STRUCTURE_ONLY:
1743     /* The option is handled directly by MatSetOption() */
1744     break;
1745   default:
1746     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1747   }
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1752 {
1753   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1754   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1755   PetscErrorCode ierr;
1756   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1757   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1758   PetscInt       *cmap,*idx_p;
1759 
1760   PetscFunctionBegin;
1761   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1762   mat->getrowactive = PETSC_TRUE;
1763 
1764   if (!mat->rowvalues && (idx || v)) {
1765     /*
1766         allocate enough space to hold information from the longest row.
1767     */
1768     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1769     PetscInt   max = 1,tmp;
1770     for (i=0; i<matin->rmap->n; i++) {
1771       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1772       if (max < tmp) max = tmp;
1773     }
1774     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1775   }
1776 
1777   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1778   lrow = row - rstart;
1779 
1780   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1781   if (!v)   {pvA = 0; pvB = 0;}
1782   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1783   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1784   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1785   nztot = nzA + nzB;
1786 
1787   cmap = mat->garray;
1788   if (v  || idx) {
1789     if (nztot) {
1790       /* Sort by increasing column numbers, assuming A and B already sorted */
1791       PetscInt imark = -1;
1792       if (v) {
1793         *v = v_p = mat->rowvalues;
1794         for (i=0; i<nzB; i++) {
1795           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1796           else break;
1797         }
1798         imark = i;
1799         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1800         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1801       }
1802       if (idx) {
1803         *idx = idx_p = mat->rowindices;
1804         if (imark > -1) {
1805           for (i=0; i<imark; i++) {
1806             idx_p[i] = cmap[cworkB[i]];
1807           }
1808         } else {
1809           for (i=0; i<nzB; i++) {
1810             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1811             else break;
1812           }
1813           imark = i;
1814         }
1815         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1816         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1817       }
1818     } else {
1819       if (idx) *idx = 0;
1820       if (v)   *v   = 0;
1821     }
1822   }
1823   *nz  = nztot;
1824   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1825   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1830 {
1831   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1832 
1833   PetscFunctionBegin;
1834   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1835   aij->getrowactive = PETSC_FALSE;
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1840 {
1841   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1842   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1843   PetscErrorCode ierr;
1844   PetscInt       i,j,cstart = mat->cmap->rstart;
1845   PetscReal      sum = 0.0;
1846   MatScalar      *v;
1847 
1848   PetscFunctionBegin;
1849   if (aij->size == 1) {
1850     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1851   } else {
1852     if (type == NORM_FROBENIUS) {
1853       v = amat->a;
1854       for (i=0; i<amat->nz; i++) {
1855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1856       }
1857       v = bmat->a;
1858       for (i=0; i<bmat->nz; i++) {
1859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1860       }
1861       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1862       *norm = PetscSqrtReal(*norm);
1863       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1864     } else if (type == NORM_1) { /* max column norm */
1865       PetscReal *tmp,*tmp2;
1866       PetscInt  *jj,*garray = aij->garray;
1867       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1868       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1869       *norm = 0.0;
1870       v     = amat->a; jj = amat->j;
1871       for (j=0; j<amat->nz; j++) {
1872         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1873       }
1874       v = bmat->a; jj = bmat->j;
1875       for (j=0; j<bmat->nz; j++) {
1876         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1877       }
1878       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1879       for (j=0; j<mat->cmap->N; j++) {
1880         if (tmp2[j] > *norm) *norm = tmp2[j];
1881       }
1882       ierr = PetscFree(tmp);CHKERRQ(ierr);
1883       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1884       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1885     } else if (type == NORM_INFINITY) { /* max row norm */
1886       PetscReal ntemp = 0.0;
1887       for (j=0; j<aij->A->rmap->n; j++) {
1888         v   = amat->a + amat->i[j];
1889         sum = 0.0;
1890         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v); v++;
1892         }
1893         v = bmat->a + bmat->i[j];
1894         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1895           sum += PetscAbsScalar(*v); v++;
1896         }
1897         if (sum > ntemp) ntemp = sum;
1898       }
1899       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1900       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1901     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1902   }
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1907 {
1908   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1909   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1910   PetscErrorCode ierr;
1911   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1912   PetscInt       cstart = A->cmap->rstart,ncol;
1913   Mat            B;
1914   MatScalar      *array;
1915 
1916   PetscFunctionBegin;
1917   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1918 
1919   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1920   ai = Aloc->i; aj = Aloc->j;
1921   bi = Bloc->i; bj = Bloc->j;
1922   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1923     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1924     PetscSFNode          *oloc;
1925     PETSC_UNUSED PetscSF sf;
1926 
1927     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1928     /* compute d_nnz for preallocation */
1929     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1930     for (i=0; i<ai[ma]; i++) {
1931       d_nnz[aj[i]]++;
1932       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1933     }
1934     /* compute local off-diagonal contributions */
1935     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1936     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1937     /* map those to global */
1938     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1939     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1940     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1941     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1942     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1943     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1944     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1945 
1946     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1947     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1948     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1949     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1950     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1951     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1952   } else {
1953     B    = *matout;
1954     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1955     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1956   }
1957 
1958   /* copy over the A part */
1959   array = Aloc->a;
1960   row   = A->rmap->rstart;
1961   for (i=0; i<ma; i++) {
1962     ncol = ai[i+1]-ai[i];
1963     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1964     row++;
1965     array += ncol; aj += ncol;
1966   }
1967   aj = Aloc->j;
1968   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1969 
1970   /* copy over the B part */
1971   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1972   array = Bloc->a;
1973   row   = A->rmap->rstart;
1974   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1975   cols_tmp = cols;
1976   for (i=0; i<mb; i++) {
1977     ncol = bi[i+1]-bi[i];
1978     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1979     row++;
1980     array += ncol; cols_tmp += ncol;
1981   }
1982   ierr = PetscFree(cols);CHKERRQ(ierr);
1983 
1984   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1985   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1986   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1987     *matout = B;
1988   } else {
1989     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1990   }
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1995 {
1996   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1997   Mat            a    = aij->A,b = aij->B;
1998   PetscErrorCode ierr;
1999   PetscInt       s1,s2,s3;
2000 
2001   PetscFunctionBegin;
2002   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2003   if (rr) {
2004     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2005     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2006     /* Overlap communication with computation. */
2007     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2008   }
2009   if (ll) {
2010     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2011     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2012     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2013   }
2014   /* scale  the diagonal block */
2015   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2016 
2017   if (rr) {
2018     /* Do a scatter end and then right scale the off-diagonal block */
2019     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2020     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2026 {
2027   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2036 {
2037   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2038   Mat            a,b,c,d;
2039   PetscBool      flg;
2040   PetscErrorCode ierr;
2041 
2042   PetscFunctionBegin;
2043   a = matA->A; b = matA->B;
2044   c = matB->A; d = matB->B;
2045 
2046   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2047   if (flg) {
2048     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2049   }
2050   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2055 {
2056   PetscErrorCode ierr;
2057   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2058   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2059 
2060   PetscFunctionBegin;
2061   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2062   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2063     /* because of the column compression in the off-processor part of the matrix a->B,
2064        the number of columns in a->B and b->B may be different, hence we cannot call
2065        the MatCopy() directly on the two parts. If need be, we can provide a more
2066        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2067        then copying the submatrices */
2068     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2069   } else {
2070     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2071     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2072   }
2073   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2078 {
2079   PetscErrorCode ierr;
2080 
2081   PetscFunctionBegin;
2082   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 /*
2087    Computes the number of nonzeros per row needed for preallocation when X and Y
2088    have different nonzero structure.
2089 */
2090 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2091 {
2092   PetscInt       i,j,k,nzx,nzy;
2093 
2094   PetscFunctionBegin;
2095   /* Set the number of nonzeros in the new matrix */
2096   for (i=0; i<m; i++) {
2097     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2098     nzx = xi[i+1] - xi[i];
2099     nzy = yi[i+1] - yi[i];
2100     nnz[i] = 0;
2101     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2102       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2103       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2104       nnz[i]++;
2105     }
2106     for (; k<nzy; k++) nnz[i]++;
2107   }
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2112 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2113 {
2114   PetscErrorCode ierr;
2115   PetscInt       m = Y->rmap->N;
2116   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2117   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2118 
2119   PetscFunctionBegin;
2120   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2125 {
2126   PetscErrorCode ierr;
2127   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2128   PetscBLASInt   bnz,one=1;
2129   Mat_SeqAIJ     *x,*y;
2130 
2131   PetscFunctionBegin;
2132   if (str == SAME_NONZERO_PATTERN) {
2133     PetscScalar alpha = a;
2134     x    = (Mat_SeqAIJ*)xx->A->data;
2135     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2136     y    = (Mat_SeqAIJ*)yy->A->data;
2137     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2138     x    = (Mat_SeqAIJ*)xx->B->data;
2139     y    = (Mat_SeqAIJ*)yy->B->data;
2140     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2141     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2142     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2143   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2144     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2145   } else {
2146     Mat      B;
2147     PetscInt *nnz_d,*nnz_o;
2148     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2149     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2150     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2151     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2152     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2153     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2154     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2155     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2156     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2157     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2158     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2159     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2160     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2161     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2167 
2168 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2169 {
2170 #if defined(PETSC_USE_COMPLEX)
2171   PetscErrorCode ierr;
2172   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2176   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2177 #else
2178   PetscFunctionBegin;
2179 #endif
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2184 {
2185   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2186   PetscErrorCode ierr;
2187 
2188   PetscFunctionBegin;
2189   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2190   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2195 {
2196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2201   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2206 {
2207   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2208   PetscErrorCode ierr;
2209   PetscInt       i,*idxb = 0;
2210   PetscScalar    *va,*vb;
2211   Vec            vtmp;
2212 
2213   PetscFunctionBegin;
2214   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2215   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2216   if (idx) {
2217     for (i=0; i<A->rmap->n; i++) {
2218       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2219     }
2220   }
2221 
2222   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2223   if (idx) {
2224     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2225   }
2226   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2227   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2228 
2229   for (i=0; i<A->rmap->n; i++) {
2230     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2231       va[i] = vb[i];
2232       if (idx) idx[i] = a->garray[idxb[i]];
2233     }
2234   }
2235 
2236   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2237   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2238   ierr = PetscFree(idxb);CHKERRQ(ierr);
2239   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2244 {
2245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2246   PetscErrorCode ierr;
2247   PetscInt       i,*idxb = 0;
2248   PetscScalar    *va,*vb;
2249   Vec            vtmp;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2253   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2254   if (idx) {
2255     for (i=0; i<A->cmap->n; i++) {
2256       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2257     }
2258   }
2259 
2260   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2261   if (idx) {
2262     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2263   }
2264   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2265   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2266 
2267   for (i=0; i<A->rmap->n; i++) {
2268     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2269       va[i] = vb[i];
2270       if (idx) idx[i] = a->garray[idxb[i]];
2271     }
2272   }
2273 
2274   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2275   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2276   ierr = PetscFree(idxb);CHKERRQ(ierr);
2277   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2282 {
2283   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2284   PetscInt       n      = A->rmap->n;
2285   PetscInt       cstart = A->cmap->rstart;
2286   PetscInt       *cmap  = mat->garray;
2287   PetscInt       *diagIdx, *offdiagIdx;
2288   Vec            diagV, offdiagV;
2289   PetscScalar    *a, *diagA, *offdiagA;
2290   PetscInt       r;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2295   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2296   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2297   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2298   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2299   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2300   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2301   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2302   for (r = 0; r < n; ++r) {
2303     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2304       a[r]   = diagA[r];
2305       idx[r] = cstart + diagIdx[r];
2306     } else {
2307       a[r]   = offdiagA[r];
2308       idx[r] = cmap[offdiagIdx[r]];
2309     }
2310   }
2311   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2312   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2313   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2314   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2315   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2316   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2321 {
2322   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2323   PetscInt       n      = A->rmap->n;
2324   PetscInt       cstart = A->cmap->rstart;
2325   PetscInt       *cmap  = mat->garray;
2326   PetscInt       *diagIdx, *offdiagIdx;
2327   Vec            diagV, offdiagV;
2328   PetscScalar    *a, *diagA, *offdiagA;
2329   PetscInt       r;
2330   PetscErrorCode ierr;
2331 
2332   PetscFunctionBegin;
2333   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2334   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2335   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2336   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2337   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2338   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2339   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2340   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2341   for (r = 0; r < n; ++r) {
2342     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2343       a[r]   = diagA[r];
2344       idx[r] = cstart + diagIdx[r];
2345     } else {
2346       a[r]   = offdiagA[r];
2347       idx[r] = cmap[offdiagIdx[r]];
2348     }
2349   }
2350   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2351   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2352   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2353   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2354   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2355   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2360 {
2361   PetscErrorCode ierr;
2362   Mat            *dummy;
2363 
2364   PetscFunctionBegin;
2365   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2366   *newmat = *dummy;
2367   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2372 {
2373   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2374   PetscErrorCode ierr;
2375 
2376   PetscFunctionBegin;
2377   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2378   A->factorerrortype = a->A->factorerrortype;
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2383 {
2384   PetscErrorCode ierr;
2385   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2389   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2390   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2396 {
2397   PetscFunctionBegin;
2398   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2399   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 /*@
2404    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2405 
2406    Collective on Mat
2407 
2408    Input Parameters:
2409 +    A - the matrix
2410 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2411 
2412  Level: advanced
2413 
2414 @*/
2415 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2416 {
2417   PetscErrorCode       ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2425 {
2426   PetscErrorCode       ierr;
2427   PetscBool            sc = PETSC_FALSE,flg;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2431   ierr = PetscObjectOptionsBegin((PetscObject)A);
2432     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2433     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2434     if (flg) {
2435       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2436     }
2437   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2442 {
2443   PetscErrorCode ierr;
2444   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2445   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2446 
2447   PetscFunctionBegin;
2448   if (!Y->preallocated) {
2449     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2450   } else if (!aij->nz) {
2451     PetscInt nonew = aij->nonew;
2452     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2453     aij->nonew = nonew;
2454   }
2455   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2460 {
2461   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2462   PetscErrorCode ierr;
2463 
2464   PetscFunctionBegin;
2465   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2466   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2467   if (d) {
2468     PetscInt rstart;
2469     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2470     *d += rstart;
2471 
2472   }
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 
2477 /* -------------------------------------------------------------------*/
2478 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2479                                        MatGetRow_MPIAIJ,
2480                                        MatRestoreRow_MPIAIJ,
2481                                        MatMult_MPIAIJ,
2482                                 /* 4*/ MatMultAdd_MPIAIJ,
2483                                        MatMultTranspose_MPIAIJ,
2484                                        MatMultTransposeAdd_MPIAIJ,
2485                                        0,
2486                                        0,
2487                                        0,
2488                                 /*10*/ 0,
2489                                        0,
2490                                        0,
2491                                        MatSOR_MPIAIJ,
2492                                        MatTranspose_MPIAIJ,
2493                                 /*15*/ MatGetInfo_MPIAIJ,
2494                                        MatEqual_MPIAIJ,
2495                                        MatGetDiagonal_MPIAIJ,
2496                                        MatDiagonalScale_MPIAIJ,
2497                                        MatNorm_MPIAIJ,
2498                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2499                                        MatAssemblyEnd_MPIAIJ,
2500                                        MatSetOption_MPIAIJ,
2501                                        MatZeroEntries_MPIAIJ,
2502                                 /*24*/ MatZeroRows_MPIAIJ,
2503                                        0,
2504                                        0,
2505                                        0,
2506                                        0,
2507                                 /*29*/ MatSetUp_MPIAIJ,
2508                                        0,
2509                                        0,
2510                                        MatGetDiagonalBlock_MPIAIJ,
2511                                        0,
2512                                 /*34*/ MatDuplicate_MPIAIJ,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                        0,
2517                                 /*39*/ MatAXPY_MPIAIJ,
2518                                        MatCreateSubMatrices_MPIAIJ,
2519                                        MatIncreaseOverlap_MPIAIJ,
2520                                        MatGetValues_MPIAIJ,
2521                                        MatCopy_MPIAIJ,
2522                                 /*44*/ MatGetRowMax_MPIAIJ,
2523                                        MatScale_MPIAIJ,
2524                                        MatShift_MPIAIJ,
2525                                        MatDiagonalSet_MPIAIJ,
2526                                        MatZeroRowsColumns_MPIAIJ,
2527                                 /*49*/ MatSetRandom_MPIAIJ,
2528                                        0,
2529                                        0,
2530                                        0,
2531                                        0,
2532                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2533                                        0,
2534                                        MatSetUnfactored_MPIAIJ,
2535                                        MatPermute_MPIAIJ,
2536                                        0,
2537                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2538                                        MatDestroy_MPIAIJ,
2539                                        MatView_MPIAIJ,
2540                                        0,
2541                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2542                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2543                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2544                                        0,
2545                                        0,
2546                                        0,
2547                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2548                                        MatGetRowMinAbs_MPIAIJ,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                 /*75*/ MatFDColoringApply_AIJ,
2554                                        MatSetFromOptions_MPIAIJ,
2555                                        0,
2556                                        0,
2557                                        MatFindZeroDiagonals_MPIAIJ,
2558                                 /*80*/ 0,
2559                                        0,
2560                                        0,
2561                                 /*83*/ MatLoad_MPIAIJ,
2562                                        MatIsSymmetric_MPIAIJ,
2563                                        0,
2564                                        0,
2565                                        0,
2566                                        0,
2567                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2568                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2569                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2570                                        MatPtAP_MPIAIJ_MPIAIJ,
2571                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2572                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2573                                        0,
2574                                        0,
2575                                        0,
2576                                        0,
2577                                 /*99*/ 0,
2578                                        0,
2579                                        0,
2580                                        MatConjugate_MPIAIJ,
2581                                        0,
2582                                 /*104*/MatSetValuesRow_MPIAIJ,
2583                                        MatRealPart_MPIAIJ,
2584                                        MatImaginaryPart_MPIAIJ,
2585                                        0,
2586                                        0,
2587                                 /*109*/0,
2588                                        0,
2589                                        MatGetRowMin_MPIAIJ,
2590                                        0,
2591                                        MatMissingDiagonal_MPIAIJ,
2592                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2593                                        0,
2594                                        MatGetGhosts_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                 /*119*/0,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                        MatGetMultiProcBlock_MPIAIJ,
2602                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2603                                        MatGetColumnNorms_MPIAIJ,
2604                                        MatInvertBlockDiagonal_MPIAIJ,
2605                                        0,
2606                                        MatCreateSubMatricesMPI_MPIAIJ,
2607                                 /*129*/0,
2608                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2609                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2610                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2611                                        0,
2612                                 /*134*/0,
2613                                        0,
2614                                        MatRARt_MPIAIJ_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                 /*139*/MatSetBlockSizes_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        MatFDColoringSetUp_MPIXAIJ,
2621                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2622                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2623 };
2624 
2625 /* ----------------------------------------------------------------------------------------*/
2626 
2627 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2628 {
2629   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2630   PetscErrorCode ierr;
2631 
2632   PetscFunctionBegin;
2633   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2634   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2639 {
2640   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2641   PetscErrorCode ierr;
2642 
2643   PetscFunctionBegin;
2644   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2645   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2650 {
2651   Mat_MPIAIJ     *b;
2652   PetscErrorCode ierr;
2653 
2654   PetscFunctionBegin;
2655   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2656   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2657   b = (Mat_MPIAIJ*)B->data;
2658 
2659 #if defined(PETSC_USE_CTABLE)
2660   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2661 #else
2662   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2663 #endif
2664   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2665   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2666   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2667 
2668   /* Because the B will have been resized we simply destroy it and create a new one each time */
2669   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2670   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2671   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2672   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2673   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2674   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2675 
2676   if (!B->preallocated) {
2677     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2678     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2679     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2680     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2681     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2682   }
2683 
2684   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2685   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2686   B->preallocated  = PETSC_TRUE;
2687   B->was_assembled = PETSC_FALSE;
2688   B->assembled     = PETSC_FALSE;;
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2693 {
2694   Mat_MPIAIJ     *b;
2695   PetscErrorCode ierr;
2696 
2697   PetscFunctionBegin;
2698   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2699   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2700   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2701   b = (Mat_MPIAIJ*)B->data;
2702 
2703 #if defined(PETSC_USE_CTABLE)
2704   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2705 #else
2706   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2707 #endif
2708   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2709   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2710   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2711 
2712   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2713   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2714   B->preallocated  = PETSC_TRUE;
2715   B->was_assembled = PETSC_FALSE;
2716   B->assembled = PETSC_FALSE;
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2721 {
2722   Mat            mat;
2723   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2724   PetscErrorCode ierr;
2725 
2726   PetscFunctionBegin;
2727   *newmat = 0;
2728   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2729   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2730   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2731   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2732   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2733   a       = (Mat_MPIAIJ*)mat->data;
2734 
2735   mat->factortype   = matin->factortype;
2736   mat->assembled    = PETSC_TRUE;
2737   mat->insertmode   = NOT_SET_VALUES;
2738   mat->preallocated = PETSC_TRUE;
2739 
2740   a->size         = oldmat->size;
2741   a->rank         = oldmat->rank;
2742   a->donotstash   = oldmat->donotstash;
2743   a->roworiented  = oldmat->roworiented;
2744   a->rowindices   = 0;
2745   a->rowvalues    = 0;
2746   a->getrowactive = PETSC_FALSE;
2747 
2748   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2749   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2750 
2751   if (oldmat->colmap) {
2752 #if defined(PETSC_USE_CTABLE)
2753     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2754 #else
2755     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2756     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2757     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2758 #endif
2759   } else a->colmap = 0;
2760   if (oldmat->garray) {
2761     PetscInt len;
2762     len  = oldmat->B->cmap->n;
2763     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2764     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2765     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2766   } else a->garray = 0;
2767 
2768   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2769   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2770   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2771   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2772   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2773   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2774   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2775   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2776   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2777   *newmat = mat;
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2782 {
2783   PetscScalar    *vals,*svals;
2784   MPI_Comm       comm;
2785   PetscErrorCode ierr;
2786   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2787   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2788   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2789   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2790   PetscInt       cend,cstart,n,*rowners;
2791   int            fd;
2792   PetscInt       bs = newMat->rmap->bs;
2793 
2794   PetscFunctionBegin;
2795   /* force binary viewer to load .info file if it has not yet done so */
2796   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2797   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2798   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2799   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2800   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2801   if (!rank) {
2802     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2803     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2804     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2805   }
2806 
2807   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2808   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2809   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2810   if (bs < 0) bs = 1;
2811 
2812   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2813   M    = header[1]; N = header[2];
2814 
2815   /* If global sizes are set, check if they are consistent with that given in the file */
2816   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2817   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2818 
2819   /* determine ownership of all (block) rows */
2820   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2821   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2822   else m = newMat->rmap->n; /* Set by user */
2823 
2824   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2825   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2826 
2827   /* First process needs enough room for process with most rows */
2828   if (!rank) {
2829     mmax = rowners[1];
2830     for (i=2; i<=size; i++) {
2831       mmax = PetscMax(mmax, rowners[i]);
2832     }
2833   } else mmax = -1;             /* unused, but compilers complain */
2834 
2835   rowners[0] = 0;
2836   for (i=2; i<=size; i++) {
2837     rowners[i] += rowners[i-1];
2838   }
2839   rstart = rowners[rank];
2840   rend   = rowners[rank+1];
2841 
2842   /* distribute row lengths to all processors */
2843   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2844   if (!rank) {
2845     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2846     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2847     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2848     for (j=0; j<m; j++) {
2849       procsnz[0] += ourlens[j];
2850     }
2851     for (i=1; i<size; i++) {
2852       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2853       /* calculate the number of nonzeros on each processor */
2854       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2855         procsnz[i] += rowlengths[j];
2856       }
2857       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2858     }
2859     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2860   } else {
2861     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2862   }
2863 
2864   if (!rank) {
2865     /* determine max buffer needed and allocate it */
2866     maxnz = 0;
2867     for (i=0; i<size; i++) {
2868       maxnz = PetscMax(maxnz,procsnz[i]);
2869     }
2870     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2871 
2872     /* read in my part of the matrix column indices  */
2873     nz   = procsnz[0];
2874     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2875     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2876 
2877     /* read in every one elses and ship off */
2878     for (i=1; i<size; i++) {
2879       nz   = procsnz[i];
2880       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2881       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2882     }
2883     ierr = PetscFree(cols);CHKERRQ(ierr);
2884   } else {
2885     /* determine buffer space needed for message */
2886     nz = 0;
2887     for (i=0; i<m; i++) {
2888       nz += ourlens[i];
2889     }
2890     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2891 
2892     /* receive message of column indices*/
2893     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2894   }
2895 
2896   /* determine column ownership if matrix is not square */
2897   if (N != M) {
2898     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2899     else n = newMat->cmap->n;
2900     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2901     cstart = cend - n;
2902   } else {
2903     cstart = rstart;
2904     cend   = rend;
2905     n      = cend - cstart;
2906   }
2907 
2908   /* loop over local rows, determining number of off diagonal entries */
2909   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2910   jj   = 0;
2911   for (i=0; i<m; i++) {
2912     for (j=0; j<ourlens[i]; j++) {
2913       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2914       jj++;
2915     }
2916   }
2917 
2918   for (i=0; i<m; i++) {
2919     ourlens[i] -= offlens[i];
2920   }
2921   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2922 
2923   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2924 
2925   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2926 
2927   for (i=0; i<m; i++) {
2928     ourlens[i] += offlens[i];
2929   }
2930 
2931   if (!rank) {
2932     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2933 
2934     /* read in my part of the matrix numerical values  */
2935     nz   = procsnz[0];
2936     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2937 
2938     /* insert into matrix */
2939     jj      = rstart;
2940     smycols = mycols;
2941     svals   = vals;
2942     for (i=0; i<m; i++) {
2943       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2944       smycols += ourlens[i];
2945       svals   += ourlens[i];
2946       jj++;
2947     }
2948 
2949     /* read in other processors and ship out */
2950     for (i=1; i<size; i++) {
2951       nz   = procsnz[i];
2952       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2953       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2954     }
2955     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2956   } else {
2957     /* receive numeric values */
2958     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2959 
2960     /* receive message of values*/
2961     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2962 
2963     /* insert into matrix */
2964     jj      = rstart;
2965     smycols = mycols;
2966     svals   = vals;
2967     for (i=0; i<m; i++) {
2968       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2969       smycols += ourlens[i];
2970       svals   += ourlens[i];
2971       jj++;
2972     }
2973   }
2974   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2975   ierr = PetscFree(vals);CHKERRQ(ierr);
2976   ierr = PetscFree(mycols);CHKERRQ(ierr);
2977   ierr = PetscFree(rowners);CHKERRQ(ierr);
2978   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2979   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 /* Not scalable because of ISAllGather() unless getting all columns. */
2984 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2985 {
2986   PetscErrorCode ierr;
2987   IS             iscol_local;
2988   PetscBool      isstride;
2989   PetscMPIInt    lisstride=0,gisstride;
2990 
2991   PetscFunctionBegin;
2992   /* check if we are grabbing all columns*/
2993   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2994 
2995   if (isstride) {
2996     PetscInt  start,len,mstart,mlen;
2997     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2998     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2999     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3000     if (mstart == start && mlen-mstart == len) lisstride = 1;
3001   }
3002 
3003   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3004   if (gisstride) {
3005     PetscInt N;
3006     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3007     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3008     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3009     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3010   } else {
3011     PetscInt cbs;
3012     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3013     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3014     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3015   }
3016 
3017   *isseq = iscol_local;
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 /*
3022  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3023  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3024 
3025  Input Parameters:
3026    mat - matrix
3027    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->rstart <= isrow[i] < mat->rend
3029    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3030            i.e., mat->cstart <= iscol[i] < mat->cend
3031  Output Parameter:
3032    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3033    iscol_o - sequential column index set for retrieving mat->B
3034    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3035  */
3036 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3037 {
3038   PetscErrorCode ierr;
3039   Vec            x,cmap;
3040   const PetscInt *is_idx;
3041   PetscScalar    *xarray,*cmaparray;
3042   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3043   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3044   Mat            B=a->B;
3045   Vec            lvec=a->lvec,lcmap;
3046   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3047   MPI_Comm       comm;
3048   VecScatter     Mvctx=a->Mvctx;
3049 
3050   PetscFunctionBegin;
3051   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3052   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3053 
3054   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3055   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3056   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3057   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3058   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3059 
3060   /* Get start indices */
3061   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3062   isstart -= ncols;
3063   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3064 
3065   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3066   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3067   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3068   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3069   for (i=0; i<ncols; i++) {
3070     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3071     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3072     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3073   }
3074   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3075   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3076   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3077 
3078   /* Get iscol_d */
3079   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3080   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3081   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3082 
3083   /* Get isrow_d */
3084   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3085   rstart = mat->rmap->rstart;
3086   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3087   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3088   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3089   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3090 
3091   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3092   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3093   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3094 
3095   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3096   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3097   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3098 
3099   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3100 
3101   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3102   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3103 
3104   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3105   /* off-process column indices */
3106   count = 0;
3107   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3108   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3109 
3110   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3111   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3112   for (i=0; i<Bn; i++) {
3113     if (PetscRealPart(xarray[i]) > -1.0) {
3114       idx[count]     = i;                   /* local column index in off-diagonal part B */
3115       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3116       count++;
3117     }
3118   }
3119   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3120   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3121 
3122   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3123   /* cannot ensure iscol_o has same blocksize as iscol! */
3124 
3125   ierr = PetscFree(idx);CHKERRQ(ierr);
3126   *garray = cmap1;
3127 
3128   ierr = VecDestroy(&x);CHKERRQ(ierr);
3129   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3130   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3135 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3136 {
3137   PetscErrorCode ierr;
3138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3139   Mat            M = NULL;
3140   MPI_Comm       comm;
3141   IS             iscol_d,isrow_d,iscol_o;
3142   Mat            Asub = NULL,Bsub = NULL;
3143   PetscInt       n;
3144 
3145   PetscFunctionBegin;
3146   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3147 
3148   if (call == MAT_REUSE_MATRIX) {
3149     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3150     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3151     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3152 
3153     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3154     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3155 
3156     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3157     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3158 
3159     /* Update diagonal and off-diagonal portions of submat */
3160     asub = (Mat_MPIAIJ*)(*submat)->data;
3161     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3162     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3163     if (n) {
3164       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3165     }
3166     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3167     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3168 
3169   } else { /* call == MAT_INITIAL_MATRIX) */
3170     const PetscInt *garray;
3171     PetscInt        BsubN;
3172 
3173     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3174     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3175 
3176     /* Create local submatrices Asub and Bsub */
3177     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3178     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3179 
3180     /* Create submatrix M */
3181     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3182 
3183     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3184     asub = (Mat_MPIAIJ*)M->data;
3185 
3186     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3187     n = asub->B->cmap->N;
3188     if (BsubN > n) {
3189       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3190       const PetscInt *idx;
3191       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3192       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3193 
3194       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3195       j = 0;
3196       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3197       for (i=0; i<n; i++) {
3198         if (j >= BsubN) break;
3199         while (subgarray[i] > garray[j]) j++;
3200 
3201         if (subgarray[i] == garray[j]) {
3202           idx_new[i] = idx[j++];
3203         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3204       }
3205       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3206 
3207       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3208       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3209 
3210     } else if (BsubN < n) {
3211       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3212     }
3213 
3214     ierr = PetscFree(garray);CHKERRQ(ierr);
3215     *submat = M;
3216 
3217     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3218     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3219     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3220 
3221     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3222     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3223 
3224     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3225     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3226   }
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3231 {
3232   PetscErrorCode ierr;
3233   IS             iscol_local=NULL,isrow_d;
3234   PetscInt       csize;
3235   PetscInt       n,i,j,start,end;
3236   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3237   MPI_Comm       comm;
3238 
3239   PetscFunctionBegin;
3240   /* If isrow has same processor distribution as mat,
3241      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3242   if (call == MAT_REUSE_MATRIX) {
3243     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3244     if (isrow_d) {
3245       sameRowDist  = PETSC_TRUE;
3246       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3247     } else {
3248       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3249       if (iscol_local) {
3250         sameRowDist  = PETSC_TRUE;
3251         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3252       }
3253     }
3254   } else {
3255     /* Check if isrow has same processor distribution as mat */
3256     sameDist[0] = PETSC_FALSE;
3257     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3258     if (!n) {
3259       sameDist[0] = PETSC_TRUE;
3260     } else {
3261       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3262       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3263       if (i >= start && j < end) {
3264         sameDist[0] = PETSC_TRUE;
3265       }
3266     }
3267 
3268     /* Check if iscol has same processor distribution as mat */
3269     sameDist[1] = PETSC_FALSE;
3270     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3271     if (!n) {
3272       sameDist[1] = PETSC_TRUE;
3273     } else {
3274       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3275       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3276       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3277     }
3278 
3279     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3280     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3281     sameRowDist = tsameDist[0];
3282   }
3283 
3284   if (sameRowDist) {
3285     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3286       /* isrow and iscol have same processor distribution as mat */
3287       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3288       PetscFunctionReturn(0);
3289     } else { /* sameRowDist */
3290       /* isrow has same processor distribution as mat */
3291       if (call == MAT_INITIAL_MATRIX) {
3292         PetscBool sorted;
3293         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3294         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3295         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3296         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3297 
3298         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3299         if (sorted) {
3300           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3301           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3302           PetscFunctionReturn(0);
3303         }
3304       } else { /* call == MAT_REUSE_MATRIX */
3305         IS    iscol_sub;
3306         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3307         if (iscol_sub) {
3308           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3309           PetscFunctionReturn(0);
3310         }
3311       }
3312     }
3313   }
3314 
3315   /* General case: iscol -> iscol_local which has global size of iscol */
3316   if (call == MAT_REUSE_MATRIX) {
3317     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3318     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3319   } else {
3320     if (!iscol_local) {
3321       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3322     }
3323   }
3324 
3325   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3326   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3327 
3328   if (call == MAT_INITIAL_MATRIX) {
3329     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3330     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3331   }
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 /*@C
3336      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3337          and "off-diagonal" part of the matrix in CSR format.
3338 
3339    Collective on MPI_Comm
3340 
3341    Input Parameters:
3342 +  comm - MPI communicator
3343 .  A - "diagonal" portion of matrix
3344 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3345 -  garray - global index of B columns
3346 
3347    Output Parameter:
3348 .   mat - the matrix, with input A as its local diagonal matrix
3349    Level: advanced
3350 
3351    Notes:
3352        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3353        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3354 
3355 .seealso: MatCreateMPIAIJWithSplitArrays()
3356 @*/
3357 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3358 {
3359   PetscErrorCode ierr;
3360   Mat_MPIAIJ     *maij;
3361   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3362   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3363   PetscScalar    *oa=b->a;
3364   Mat            Bnew;
3365   PetscInt       m,n,N;
3366 
3367   PetscFunctionBegin;
3368   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3369   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3370   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3371   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3372   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3373   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3374 
3375   /* Get global columns of mat */
3376   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3377 
3378   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3379   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3380   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3381   maij = (Mat_MPIAIJ*)(*mat)->data;
3382 
3383   (*mat)->preallocated = PETSC_TRUE;
3384 
3385   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3386   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3387 
3388   /* Set A as diagonal portion of *mat */
3389   maij->A = A;
3390 
3391   nz = oi[m];
3392   for (i=0; i<nz; i++) {
3393     col   = oj[i];
3394     oj[i] = garray[col];
3395   }
3396 
3397    /* Set Bnew as off-diagonal portion of *mat */
3398   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3399   bnew        = (Mat_SeqAIJ*)Bnew->data;
3400   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3401   maij->B     = Bnew;
3402 
3403   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3404 
3405   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3406   b->free_a       = PETSC_FALSE;
3407   b->free_ij      = PETSC_FALSE;
3408   ierr = MatDestroy(&B);CHKERRQ(ierr);
3409 
3410   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3411   bnew->free_a       = PETSC_TRUE;
3412   bnew->free_ij      = PETSC_TRUE;
3413 
3414   /* condense columns of maij->B */
3415   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3416   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3417   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3419   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3424 
3425 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3426 {
3427   PetscErrorCode ierr;
3428   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3429   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3430   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3431   Mat            M,Msub,B=a->B;
3432   MatScalar      *aa;
3433   Mat_SeqAIJ     *aij;
3434   PetscInt       *garray = a->garray,*colsub,Ncols;
3435   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3436   IS             iscol_sub,iscmap;
3437   const PetscInt *is_idx,*cmap;
3438   PetscBool      allcolumns=PETSC_FALSE;
3439   MPI_Comm       comm;
3440 
3441   PetscFunctionBegin;
3442   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3443 
3444   if (call == MAT_REUSE_MATRIX) {
3445     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3446     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3447     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3448 
3449     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3450     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3451 
3452     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3453     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3454 
3455     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3456 
3457   } else { /* call == MAT_INITIAL_MATRIX) */
3458     PetscBool flg;
3459 
3460     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3461     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3462 
3463     /* (1) iscol -> nonscalable iscol_local */
3464     /* Check for special case: each processor gets entire matrix columns */
3465     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3466     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3467     if (allcolumns) {
3468       iscol_sub = iscol_local;
3469       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3470       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3471 
3472     } else {
3473       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3474       PetscInt *idx,*cmap1,k;
3475       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3476       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3477       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3478       count = 0;
3479       k     = 0;
3480       for (i=0; i<Ncols; i++) {
3481         j = is_idx[i];
3482         if (j >= cstart && j < cend) {
3483           /* diagonal part of mat */
3484           idx[count]     = j;
3485           cmap1[count++] = i; /* column index in submat */
3486         } else if (Bn) {
3487           /* off-diagonal part of mat */
3488           if (j == garray[k]) {
3489             idx[count]     = j;
3490             cmap1[count++] = i;  /* column index in submat */
3491           } else if (j > garray[k]) {
3492             while (j > garray[k] && k < Bn-1) k++;
3493             if (j == garray[k]) {
3494               idx[count]     = j;
3495               cmap1[count++] = i; /* column index in submat */
3496             }
3497           }
3498         }
3499       }
3500       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3501 
3502       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3503       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3504       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3505 
3506       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3507     }
3508 
3509     /* (3) Create sequential Msub */
3510     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3511   }
3512 
3513   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3514   aij  = (Mat_SeqAIJ*)(Msub)->data;
3515   ii   = aij->i;
3516   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3517 
3518   /*
3519       m - number of local rows
3520       Ncols - number of columns (same on all processors)
3521       rstart - first row in new global matrix generated
3522   */
3523   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3524 
3525   if (call == MAT_INITIAL_MATRIX) {
3526     /* (4) Create parallel newmat */
3527     PetscMPIInt    rank,size;
3528     PetscInt       csize;
3529 
3530     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3531     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3532 
3533     /*
3534         Determine the number of non-zeros in the diagonal and off-diagonal
3535         portions of the matrix in order to do correct preallocation
3536     */
3537 
3538     /* first get start and end of "diagonal" columns */
3539     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3540     if (csize == PETSC_DECIDE) {
3541       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3542       if (mglobal == Ncols) { /* square matrix */
3543         nlocal = m;
3544       } else {
3545         nlocal = Ncols/size + ((Ncols % size) > rank);
3546       }
3547     } else {
3548       nlocal = csize;
3549     }
3550     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3551     rstart = rend - nlocal;
3552     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3553 
3554     /* next, compute all the lengths */
3555     jj    = aij->j;
3556     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3557     olens = dlens + m;
3558     for (i=0; i<m; i++) {
3559       jend = ii[i+1] - ii[i];
3560       olen = 0;
3561       dlen = 0;
3562       for (j=0; j<jend; j++) {
3563         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3564         else dlen++;
3565         jj++;
3566       }
3567       olens[i] = olen;
3568       dlens[i] = dlen;
3569     }
3570 
3571     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3572     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3573 
3574     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3575     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3576     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3577     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3578     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3579     ierr = PetscFree(dlens);CHKERRQ(ierr);
3580 
3581   } else { /* call == MAT_REUSE_MATRIX */
3582     M    = *newmat;
3583     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3584     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3585     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3586     /*
3587          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3588        rather than the slower MatSetValues().
3589     */
3590     M->was_assembled = PETSC_TRUE;
3591     M->assembled     = PETSC_FALSE;
3592   }
3593 
3594   /* (5) Set values of Msub to *newmat */
3595   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3596   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3597 
3598   jj   = aij->j;
3599   aa   = aij->a;
3600   for (i=0; i<m; i++) {
3601     row = rstart + i;
3602     nz  = ii[i+1] - ii[i];
3603     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3604     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3605     jj += nz; aa += nz;
3606   }
3607   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3608 
3609   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611 
3612   ierr = PetscFree(colsub);CHKERRQ(ierr);
3613 
3614   /* save Msub, iscol_sub and iscmap used in processor for next request */
3615   if (call ==  MAT_INITIAL_MATRIX) {
3616     *newmat = M;
3617     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3618     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3619 
3620     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3621     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3622 
3623     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3624     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3625 
3626     if (iscol_local) {
3627       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3628       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3629     }
3630   }
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 /*
3635     Not great since it makes two copies of the submatrix, first an SeqAIJ
3636   in local and then by concatenating the local matrices the end result.
3637   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3638 
3639   Note: This requires a sequential iscol with all indices.
3640 */
3641 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3642 {
3643   PetscErrorCode ierr;
3644   PetscMPIInt    rank,size;
3645   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3646   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3647   Mat            M,Mreuse;
3648   MatScalar      *aa,*vwork;
3649   MPI_Comm       comm;
3650   Mat_SeqAIJ     *aij;
3651   PetscBool      colflag,allcolumns=PETSC_FALSE;
3652 
3653   PetscFunctionBegin;
3654   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3655   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3657 
3658   /* Check for special case: each processor gets entire matrix columns */
3659   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3660   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3661   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3662 
3663   if (call ==  MAT_REUSE_MATRIX) {
3664     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3665     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3666     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3667   } else {
3668     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3669   }
3670 
3671   /*
3672       m - number of local rows
3673       n - number of columns (same on all processors)
3674       rstart - first row in new global matrix generated
3675   */
3676   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3677   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3678   if (call == MAT_INITIAL_MATRIX) {
3679     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3680     ii  = aij->i;
3681     jj  = aij->j;
3682 
3683     /*
3684         Determine the number of non-zeros in the diagonal and off-diagonal
3685         portions of the matrix in order to do correct preallocation
3686     */
3687 
3688     /* first get start and end of "diagonal" columns */
3689     if (csize == PETSC_DECIDE) {
3690       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3691       if (mglobal == n) { /* square matrix */
3692         nlocal = m;
3693       } else {
3694         nlocal = n/size + ((n % size) > rank);
3695       }
3696     } else {
3697       nlocal = csize;
3698     }
3699     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3700     rstart = rend - nlocal;
3701     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3702 
3703     /* next, compute all the lengths */
3704     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3705     olens = dlens + m;
3706     for (i=0; i<m; i++) {
3707       jend = ii[i+1] - ii[i];
3708       olen = 0;
3709       dlen = 0;
3710       for (j=0; j<jend; j++) {
3711         if (*jj < rstart || *jj >= rend) olen++;
3712         else dlen++;
3713         jj++;
3714       }
3715       olens[i] = olen;
3716       dlens[i] = dlen;
3717     }
3718     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3719     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3720     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3721     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3722     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3723     ierr = PetscFree(dlens);CHKERRQ(ierr);
3724   } else {
3725     PetscInt ml,nl;
3726 
3727     M    = *newmat;
3728     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3729     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3730     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3731     /*
3732          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3733        rather than the slower MatSetValues().
3734     */
3735     M->was_assembled = PETSC_TRUE;
3736     M->assembled     = PETSC_FALSE;
3737   }
3738   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3739   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3740   ii   = aij->i;
3741   jj   = aij->j;
3742   aa   = aij->a;
3743   for (i=0; i<m; i++) {
3744     row   = rstart + i;
3745     nz    = ii[i+1] - ii[i];
3746     cwork = jj;     jj += nz;
3747     vwork = aa;     aa += nz;
3748     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3749   }
3750 
3751   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753   *newmat = M;
3754 
3755   /* save submatrix used in processor for next request */
3756   if (call ==  MAT_INITIAL_MATRIX) {
3757     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3758     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3759   }
3760   PetscFunctionReturn(0);
3761 }
3762 
3763 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3764 {
3765   PetscInt       m,cstart, cend,j,nnz,i,d;
3766   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3767   const PetscInt *JJ;
3768   PetscScalar    *values;
3769   PetscErrorCode ierr;
3770   PetscBool      nooffprocentries;
3771 
3772   PetscFunctionBegin;
3773   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3774 
3775   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3776   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3777   m      = B->rmap->n;
3778   cstart = B->cmap->rstart;
3779   cend   = B->cmap->rend;
3780   rstart = B->rmap->rstart;
3781 
3782   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3783 
3784 #if defined(PETSC_USE_DEBUGGING)
3785   for (i=0; i<m; i++) {
3786     nnz = Ii[i+1]- Ii[i];
3787     JJ  = J + Ii[i];
3788     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3789     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3790     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3791   }
3792 #endif
3793 
3794   for (i=0; i<m; i++) {
3795     nnz     = Ii[i+1]- Ii[i];
3796     JJ      = J + Ii[i];
3797     nnz_max = PetscMax(nnz_max,nnz);
3798     d       = 0;
3799     for (j=0; j<nnz; j++) {
3800       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3801     }
3802     d_nnz[i] = d;
3803     o_nnz[i] = nnz - d;
3804   }
3805   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3806   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3807 
3808   if (v) values = (PetscScalar*)v;
3809   else {
3810     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3811   }
3812 
3813   for (i=0; i<m; i++) {
3814     ii   = i + rstart;
3815     nnz  = Ii[i+1]- Ii[i];
3816     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3817   }
3818   nooffprocentries    = B->nooffprocentries;
3819   B->nooffprocentries = PETSC_TRUE;
3820   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3821   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3822   B->nooffprocentries = nooffprocentries;
3823 
3824   if (!v) {
3825     ierr = PetscFree(values);CHKERRQ(ierr);
3826   }
3827   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 /*@
3832    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3833    (the default parallel PETSc format).
3834 
3835    Collective on MPI_Comm
3836 
3837    Input Parameters:
3838 +  B - the matrix
3839 .  i - the indices into j for the start of each local row (starts with zero)
3840 .  j - the column indices for each local row (starts with zero)
3841 -  v - optional values in the matrix
3842 
3843    Level: developer
3844 
3845    Notes:
3846        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3847      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3848      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3849 
3850        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3851 
3852        The format which is used for the sparse matrix input, is equivalent to a
3853     row-major ordering.. i.e for the following matrix, the input data expected is
3854     as shown
3855 
3856 $        1 0 0
3857 $        2 0 3     P0
3858 $       -------
3859 $        4 5 6     P1
3860 $
3861 $     Process0 [P0]: rows_owned=[0,1]
3862 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3863 $        j =  {0,0,2}  [size = 3]
3864 $        v =  {1,2,3}  [size = 3]
3865 $
3866 $     Process1 [P1]: rows_owned=[2]
3867 $        i =  {0,3}    [size = nrow+1  = 1+1]
3868 $        j =  {0,1,2}  [size = 3]
3869 $        v =  {4,5,6}  [size = 3]
3870 
3871 .keywords: matrix, aij, compressed row, sparse, parallel
3872 
3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3874           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3875 @*/
3876 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3877 {
3878   PetscErrorCode ierr;
3879 
3880   PetscFunctionBegin;
3881   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3882   PetscFunctionReturn(0);
3883 }
3884 
3885 /*@C
3886    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3887    (the default parallel PETSc format).  For good matrix assembly performance
3888    the user should preallocate the matrix storage by setting the parameters
3889    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3890    performance can be increased by more than a factor of 50.
3891 
3892    Collective on MPI_Comm
3893 
3894    Input Parameters:
3895 +  B - the matrix
3896 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3897            (same value is used for all local rows)
3898 .  d_nnz - array containing the number of nonzeros in the various rows of the
3899            DIAGONAL portion of the local submatrix (possibly different for each row)
3900            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3901            The size of this array is equal to the number of local rows, i.e 'm'.
3902            For matrices that will be factored, you must leave room for (and set)
3903            the diagonal entry even if it is zero.
3904 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3905            submatrix (same value is used for all local rows).
3906 -  o_nnz - array containing the number of nonzeros in the various rows of the
3907            OFF-DIAGONAL portion of the local submatrix (possibly different for
3908            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3909            structure. The size of this array is equal to the number
3910            of local rows, i.e 'm'.
3911 
3912    If the *_nnz parameter is given then the *_nz parameter is ignored
3913 
3914    The AIJ format (also called the Yale sparse matrix format or
3915    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3916    storage.  The stored row and column indices begin with zero.
3917    See Users-Manual: ch_mat for details.
3918 
3919    The parallel matrix is partitioned such that the first m0 rows belong to
3920    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3921    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3922 
3923    The DIAGONAL portion of the local submatrix of a processor can be defined
3924    as the submatrix which is obtained by extraction the part corresponding to
3925    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3926    first row that belongs to the processor, r2 is the last row belonging to
3927    the this processor, and c1-c2 is range of indices of the local part of a
3928    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3929    common case of a square matrix, the row and column ranges are the same and
3930    the DIAGONAL part is also square. The remaining portion of the local
3931    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3932 
3933    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3934 
3935    You can call MatGetInfo() to get information on how effective the preallocation was;
3936    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3937    You can also run with the option -info and look for messages with the string
3938    malloc in them to see if additional memory allocation was needed.
3939 
3940    Example usage:
3941 
3942    Consider the following 8x8 matrix with 34 non-zero values, that is
3943    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3944    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3945    as follows:
3946 
3947 .vb
3948             1  2  0  |  0  3  0  |  0  4
3949     Proc0   0  5  6  |  7  0  0  |  8  0
3950             9  0 10  | 11  0  0  | 12  0
3951     -------------------------------------
3952            13  0 14  | 15 16 17  |  0  0
3953     Proc1   0 18  0  | 19 20 21  |  0  0
3954             0  0  0  | 22 23  0  | 24  0
3955     -------------------------------------
3956     Proc2  25 26 27  |  0  0 28  | 29  0
3957            30  0  0  | 31 32 33  |  0 34
3958 .ve
3959 
3960    This can be represented as a collection of submatrices as:
3961 
3962 .vb
3963       A B C
3964       D E F
3965       G H I
3966 .ve
3967 
3968    Where the submatrices A,B,C are owned by proc0, D,E,F are
3969    owned by proc1, G,H,I are owned by proc2.
3970 
3971    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3972    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3973    The 'M','N' parameters are 8,8, and have the same values on all procs.
3974 
3975    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3976    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3977    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3978    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3979    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3980    matrix, ans [DF] as another SeqAIJ matrix.
3981 
3982    When d_nz, o_nz parameters are specified, d_nz storage elements are
3983    allocated for every row of the local diagonal submatrix, and o_nz
3984    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3985    One way to choose d_nz and o_nz is to use the max nonzerors per local
3986    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3987    In this case, the values of d_nz,o_nz are:
3988 .vb
3989      proc0 : dnz = 2, o_nz = 2
3990      proc1 : dnz = 3, o_nz = 2
3991      proc2 : dnz = 1, o_nz = 4
3992 .ve
3993    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3994    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3995    for proc3. i.e we are using 12+15+10=37 storage locations to store
3996    34 values.
3997 
3998    When d_nnz, o_nnz parameters are specified, the storage is specified
3999    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4000    In the above case the values for d_nnz,o_nnz are:
4001 .vb
4002      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4003      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4004      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4005 .ve
4006    Here the space allocated is sum of all the above values i.e 34, and
4007    hence pre-allocation is perfect.
4008 
4009    Level: intermediate
4010 
4011 .keywords: matrix, aij, compressed row, sparse, parallel
4012 
4013 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4014           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4015 @*/
4016 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4017 {
4018   PetscErrorCode ierr;
4019 
4020   PetscFunctionBegin;
4021   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4022   PetscValidType(B,1);
4023   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4024   PetscFunctionReturn(0);
4025 }
4026 
4027 /*@
4028      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4029          CSR format the local rows.
4030 
4031    Collective on MPI_Comm
4032 
4033    Input Parameters:
4034 +  comm - MPI communicator
4035 .  m - number of local rows (Cannot be PETSC_DECIDE)
4036 .  n - This value should be the same as the local size used in creating the
4037        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4038        calculated if N is given) For square matrices n is almost always m.
4039 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4040 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4041 .   i - row indices
4042 .   j - column indices
4043 -   a - matrix values
4044 
4045    Output Parameter:
4046 .   mat - the matrix
4047 
4048    Level: intermediate
4049 
4050    Notes:
4051        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4052      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4053      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4054 
4055        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4056 
4057        The format which is used for the sparse matrix input, is equivalent to a
4058     row-major ordering.. i.e for the following matrix, the input data expected is
4059     as shown
4060 
4061 $        1 0 0
4062 $        2 0 3     P0
4063 $       -------
4064 $        4 5 6     P1
4065 $
4066 $     Process0 [P0]: rows_owned=[0,1]
4067 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4068 $        j =  {0,0,2}  [size = 3]
4069 $        v =  {1,2,3}  [size = 3]
4070 $
4071 $     Process1 [P1]: rows_owned=[2]
4072 $        i =  {0,3}    [size = nrow+1  = 1+1]
4073 $        j =  {0,1,2}  [size = 3]
4074 $        v =  {4,5,6}  [size = 3]
4075 
4076 .keywords: matrix, aij, compressed row, sparse, parallel
4077 
4078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4079           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4080 @*/
4081 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4082 {
4083   PetscErrorCode ierr;
4084 
4085   PetscFunctionBegin;
4086   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4087   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4088   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4089   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4090   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4091   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4092   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4093   PetscFunctionReturn(0);
4094 }
4095 
4096 /*@C
4097    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4098    (the default parallel PETSc format).  For good matrix assembly performance
4099    the user should preallocate the matrix storage by setting the parameters
4100    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4101    performance can be increased by more than a factor of 50.
4102 
4103    Collective on MPI_Comm
4104 
4105    Input Parameters:
4106 +  comm - MPI communicator
4107 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4108            This value should be the same as the local size used in creating the
4109            y vector for the matrix-vector product y = Ax.
4110 .  n - This value should be the same as the local size used in creating the
4111        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4112        calculated if N is given) For square matrices n is almost always m.
4113 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4114 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4115 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4116            (same value is used for all local rows)
4117 .  d_nnz - array containing the number of nonzeros in the various rows of the
4118            DIAGONAL portion of the local submatrix (possibly different for each row)
4119            or NULL, if d_nz is used to specify the nonzero structure.
4120            The size of this array is equal to the number of local rows, i.e 'm'.
4121 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4122            submatrix (same value is used for all local rows).
4123 -  o_nnz - array containing the number of nonzeros in the various rows of the
4124            OFF-DIAGONAL portion of the local submatrix (possibly different for
4125            each row) or NULL, if o_nz is used to specify the nonzero
4126            structure. The size of this array is equal to the number
4127            of local rows, i.e 'm'.
4128 
4129    Output Parameter:
4130 .  A - the matrix
4131 
4132    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4133    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4134    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4135 
4136    Notes:
4137    If the *_nnz parameter is given then the *_nz parameter is ignored
4138 
4139    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4140    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4141    storage requirements for this matrix.
4142 
4143    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4144    processor than it must be used on all processors that share the object for
4145    that argument.
4146 
4147    The user MUST specify either the local or global matrix dimensions
4148    (possibly both).
4149 
4150    The parallel matrix is partitioned across processors such that the
4151    first m0 rows belong to process 0, the next m1 rows belong to
4152    process 1, the next m2 rows belong to process 2 etc.. where
4153    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4154    values corresponding to [m x N] submatrix.
4155 
4156    The columns are logically partitioned with the n0 columns belonging
4157    to 0th partition, the next n1 columns belonging to the next
4158    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4159 
4160    The DIAGONAL portion of the local submatrix on any given processor
4161    is the submatrix corresponding to the rows and columns m,n
4162    corresponding to the given processor. i.e diagonal matrix on
4163    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4164    etc. The remaining portion of the local submatrix [m x (N-n)]
4165    constitute the OFF-DIAGONAL portion. The example below better
4166    illustrates this concept.
4167 
4168    For a square global matrix we define each processor's diagonal portion
4169    to be its local rows and the corresponding columns (a square submatrix);
4170    each processor's off-diagonal portion encompasses the remainder of the
4171    local matrix (a rectangular submatrix).
4172 
4173    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4174 
4175    When calling this routine with a single process communicator, a matrix of
4176    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4177    type of communicator, use the construction mechanism
4178 .vb
4179      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4180 .ve
4181 
4182 $     MatCreate(...,&A);
4183 $     MatSetType(A,MATMPIAIJ);
4184 $     MatSetSizes(A, m,n,M,N);
4185 $     MatMPIAIJSetPreallocation(A,...);
4186 
4187    By default, this format uses inodes (identical nodes) when possible.
4188    We search for consecutive rows with the same nonzero structure, thereby
4189    reusing matrix information to achieve increased efficiency.
4190 
4191    Options Database Keys:
4192 +  -mat_no_inode  - Do not use inodes
4193 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4194 -  -mat_aij_oneindex - Internally use indexing starting at 1
4195         rather than 0.  Note that when calling MatSetValues(),
4196         the user still MUST index entries starting at 0!
4197 
4198 
4199    Example usage:
4200 
4201    Consider the following 8x8 matrix with 34 non-zero values, that is
4202    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4203    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4204    as follows
4205 
4206 .vb
4207             1  2  0  |  0  3  0  |  0  4
4208     Proc0   0  5  6  |  7  0  0  |  8  0
4209             9  0 10  | 11  0  0  | 12  0
4210     -------------------------------------
4211            13  0 14  | 15 16 17  |  0  0
4212     Proc1   0 18  0  | 19 20 21  |  0  0
4213             0  0  0  | 22 23  0  | 24  0
4214     -------------------------------------
4215     Proc2  25 26 27  |  0  0 28  | 29  0
4216            30  0  0  | 31 32 33  |  0 34
4217 .ve
4218 
4219    This can be represented as a collection of submatrices as
4220 
4221 .vb
4222       A B C
4223       D E F
4224       G H I
4225 .ve
4226 
4227    Where the submatrices A,B,C are owned by proc0, D,E,F are
4228    owned by proc1, G,H,I are owned by proc2.
4229 
4230    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4231    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4232    The 'M','N' parameters are 8,8, and have the same values on all procs.
4233 
4234    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4235    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4236    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4237    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4238    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4239    matrix, ans [DF] as another SeqAIJ matrix.
4240 
4241    When d_nz, o_nz parameters are specified, d_nz storage elements are
4242    allocated for every row of the local diagonal submatrix, and o_nz
4243    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4244    One way to choose d_nz and o_nz is to use the max nonzerors per local
4245    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4246    In this case, the values of d_nz,o_nz are
4247 .vb
4248      proc0 : dnz = 2, o_nz = 2
4249      proc1 : dnz = 3, o_nz = 2
4250      proc2 : dnz = 1, o_nz = 4
4251 .ve
4252    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4253    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4254    for proc3. i.e we are using 12+15+10=37 storage locations to store
4255    34 values.
4256 
4257    When d_nnz, o_nnz parameters are specified, the storage is specified
4258    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4259    In the above case the values for d_nnz,o_nnz are
4260 .vb
4261      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4262      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4263      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4264 .ve
4265    Here the space allocated is sum of all the above values i.e 34, and
4266    hence pre-allocation is perfect.
4267 
4268    Level: intermediate
4269 
4270 .keywords: matrix, aij, compressed row, sparse, parallel
4271 
4272 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4273           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4274 @*/
4275 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4276 {
4277   PetscErrorCode ierr;
4278   PetscMPIInt    size;
4279 
4280   PetscFunctionBegin;
4281   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4282   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4283   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4284   if (size > 1) {
4285     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4286     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4287   } else {
4288     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4289     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4290   }
4291   PetscFunctionReturn(0);
4292 }
4293 
4294 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4295 {
4296   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4297   PetscBool      flg;
4298   PetscErrorCode ierr;
4299 
4300   PetscFunctionBegin;
4301   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4302   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4303   if (Ad)     *Ad     = a->A;
4304   if (Ao)     *Ao     = a->B;
4305   if (colmap) *colmap = a->garray;
4306   PetscFunctionReturn(0);
4307 }
4308 
4309 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4310 {
4311   PetscErrorCode ierr;
4312   PetscInt       m,N,i,rstart,nnz,Ii;
4313   PetscInt       *indx;
4314   PetscScalar    *values;
4315 
4316   PetscFunctionBegin;
4317   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4318   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4319     PetscInt       *dnz,*onz,sum,bs,cbs;
4320 
4321     if (n == PETSC_DECIDE) {
4322       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4323     }
4324     /* Check sum(n) = N */
4325     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4326     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4327 
4328     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4329     rstart -= m;
4330 
4331     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4332     for (i=0; i<m; i++) {
4333       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4334       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4335       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4336     }
4337 
4338     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4339     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4340     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4341     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4342     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4343     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4344     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4345     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4346   }
4347 
4348   /* numeric phase */
4349   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4350   for (i=0; i<m; i++) {
4351     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4352     Ii   = i + rstart;
4353     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4354     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4355   }
4356   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4357   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4362 {
4363   PetscErrorCode    ierr;
4364   PetscMPIInt       rank;
4365   PetscInt          m,N,i,rstart,nnz;
4366   size_t            len;
4367   const PetscInt    *indx;
4368   PetscViewer       out;
4369   char              *name;
4370   Mat               B;
4371   const PetscScalar *values;
4372 
4373   PetscFunctionBegin;
4374   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4375   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4376   /* Should this be the type of the diagonal block of A? */
4377   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4378   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4379   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4380   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4381   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4382   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4383   for (i=0; i<m; i++) {
4384     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4385     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4386     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4387   }
4388   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4389   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4390 
4391   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4392   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4393   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4394   sprintf(name,"%s.%d",outfile,rank);
4395   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4396   ierr = PetscFree(name);CHKERRQ(ierr);
4397   ierr = MatView(B,out);CHKERRQ(ierr);
4398   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4399   ierr = MatDestroy(&B);CHKERRQ(ierr);
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4404 {
4405   PetscErrorCode      ierr;
4406   Mat_Merge_SeqsToMPI *merge;
4407   PetscContainer      container;
4408 
4409   PetscFunctionBegin;
4410   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4411   if (container) {
4412     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4425     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4426     ierr = PetscFree(merge);CHKERRQ(ierr);
4427     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4428   }
4429   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 #include <../src/mat/utils/freespace.h>
4434 #include <petscbt.h>
4435 
4436 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4437 {
4438   PetscErrorCode      ierr;
4439   MPI_Comm            comm;
4440   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4441   PetscMPIInt         size,rank,taga,*len_s;
4442   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4443   PetscInt            proc,m;
4444   PetscInt            **buf_ri,**buf_rj;
4445   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4446   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4447   MPI_Request         *s_waits,*r_waits;
4448   MPI_Status          *status;
4449   MatScalar           *aa=a->a;
4450   MatScalar           **abuf_r,*ba_i;
4451   Mat_Merge_SeqsToMPI *merge;
4452   PetscContainer      container;
4453 
4454   PetscFunctionBegin;
4455   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4456   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4457 
4458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4459   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4460 
4461   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4462   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4463 
4464   bi     = merge->bi;
4465   bj     = merge->bj;
4466   buf_ri = merge->buf_ri;
4467   buf_rj = merge->buf_rj;
4468 
4469   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4470   owners = merge->rowmap->range;
4471   len_s  = merge->len_s;
4472 
4473   /* send and recv matrix values */
4474   /*-----------------------------*/
4475   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4476   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4477 
4478   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4479   for (proc=0,k=0; proc<size; proc++) {
4480     if (!len_s[proc]) continue;
4481     i    = owners[proc];
4482     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4483     k++;
4484   }
4485 
4486   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4487   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4488   ierr = PetscFree(status);CHKERRQ(ierr);
4489 
4490   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4491   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4492 
4493   /* insert mat values of mpimat */
4494   /*----------------------------*/
4495   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4496   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4497 
4498   for (k=0; k<merge->nrecv; k++) {
4499     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4500     nrows       = *(buf_ri_k[k]);
4501     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4502     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4503   }
4504 
4505   /* set values of ba */
4506   m = merge->rowmap->n;
4507   for (i=0; i<m; i++) {
4508     arow = owners[rank] + i;
4509     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4510     bnzi = bi[i+1] - bi[i];
4511     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4512 
4513     /* add local non-zero vals of this proc's seqmat into ba */
4514     anzi   = ai[arow+1] - ai[arow];
4515     aj     = a->j + ai[arow];
4516     aa     = a->a + ai[arow];
4517     nextaj = 0;
4518     for (j=0; nextaj<anzi; j++) {
4519       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4520         ba_i[j] += aa[nextaj++];
4521       }
4522     }
4523 
4524     /* add received vals into ba */
4525     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4526       /* i-th row */
4527       if (i == *nextrow[k]) {
4528         anzi   = *(nextai[k]+1) - *nextai[k];
4529         aj     = buf_rj[k] + *(nextai[k]);
4530         aa     = abuf_r[k] + *(nextai[k]);
4531         nextaj = 0;
4532         for (j=0; nextaj<anzi; j++) {
4533           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4534             ba_i[j] += aa[nextaj++];
4535           }
4536         }
4537         nextrow[k]++; nextai[k]++;
4538       }
4539     }
4540     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4541   }
4542   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4543   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4544 
4545   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4546   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4547   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4548   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4549   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4550   PetscFunctionReturn(0);
4551 }
4552 
4553 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4554 {
4555   PetscErrorCode      ierr;
4556   Mat                 B_mpi;
4557   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4558   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4559   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4560   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4561   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4562   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4563   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4564   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4565   MPI_Status          *status;
4566   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4567   PetscBT             lnkbt;
4568   Mat_Merge_SeqsToMPI *merge;
4569   PetscContainer      container;
4570 
4571   PetscFunctionBegin;
4572   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4573 
4574   /* make sure it is a PETSc comm */
4575   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4576   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4577   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4578 
4579   ierr = PetscNew(&merge);CHKERRQ(ierr);
4580   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4581 
4582   /* determine row ownership */
4583   /*---------------------------------------------------------*/
4584   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4585   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4586   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4587   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4588   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4589   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4590   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4591 
4592   m      = merge->rowmap->n;
4593   owners = merge->rowmap->range;
4594 
4595   /* determine the number of messages to send, their lengths */
4596   /*---------------------------------------------------------*/
4597   len_s = merge->len_s;
4598 
4599   len          = 0; /* length of buf_si[] */
4600   merge->nsend = 0;
4601   for (proc=0; proc<size; proc++) {
4602     len_si[proc] = 0;
4603     if (proc == rank) {
4604       len_s[proc] = 0;
4605     } else {
4606       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4607       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4608     }
4609     if (len_s[proc]) {
4610       merge->nsend++;
4611       nrows = 0;
4612       for (i=owners[proc]; i<owners[proc+1]; i++) {
4613         if (ai[i+1] > ai[i]) nrows++;
4614       }
4615       len_si[proc] = 2*(nrows+1);
4616       len         += len_si[proc];
4617     }
4618   }
4619 
4620   /* determine the number and length of messages to receive for ij-structure */
4621   /*-------------------------------------------------------------------------*/
4622   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4623   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4624 
4625   /* post the Irecv of j-structure */
4626   /*-------------------------------*/
4627   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4628   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4629 
4630   /* post the Isend of j-structure */
4631   /*--------------------------------*/
4632   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4633 
4634   for (proc=0, k=0; proc<size; proc++) {
4635     if (!len_s[proc]) continue;
4636     i    = owners[proc];
4637     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4638     k++;
4639   }
4640 
4641   /* receives and sends of j-structure are complete */
4642   /*------------------------------------------------*/
4643   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4644   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4645 
4646   /* send and recv i-structure */
4647   /*---------------------------*/
4648   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4649   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4650 
4651   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4652   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4653   for (proc=0,k=0; proc<size; proc++) {
4654     if (!len_s[proc]) continue;
4655     /* form outgoing message for i-structure:
4656          buf_si[0]:                 nrows to be sent
4657                [1:nrows]:           row index (global)
4658                [nrows+1:2*nrows+1]: i-structure index
4659     */
4660     /*-------------------------------------------*/
4661     nrows       = len_si[proc]/2 - 1;
4662     buf_si_i    = buf_si + nrows+1;
4663     buf_si[0]   = nrows;
4664     buf_si_i[0] = 0;
4665     nrows       = 0;
4666     for (i=owners[proc]; i<owners[proc+1]; i++) {
4667       anzi = ai[i+1] - ai[i];
4668       if (anzi) {
4669         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4670         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4671         nrows++;
4672       }
4673     }
4674     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4675     k++;
4676     buf_si += len_si[proc];
4677   }
4678 
4679   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4680   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4681 
4682   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4683   for (i=0; i<merge->nrecv; i++) {
4684     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4685   }
4686 
4687   ierr = PetscFree(len_si);CHKERRQ(ierr);
4688   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4689   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4690   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4691   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4692   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4693   ierr = PetscFree(status);CHKERRQ(ierr);
4694 
4695   /* compute a local seq matrix in each processor */
4696   /*----------------------------------------------*/
4697   /* allocate bi array and free space for accumulating nonzero column info */
4698   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4699   bi[0] = 0;
4700 
4701   /* create and initialize a linked list */
4702   nlnk = N+1;
4703   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4704 
4705   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4706   len  = ai[owners[rank+1]] - ai[owners[rank]];
4707   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4708 
4709   current_space = free_space;
4710 
4711   /* determine symbolic info for each local row */
4712   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4713 
4714   for (k=0; k<merge->nrecv; k++) {
4715     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4716     nrows       = *buf_ri_k[k];
4717     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4718     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4719   }
4720 
4721   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4722   len  = 0;
4723   for (i=0; i<m; i++) {
4724     bnzi = 0;
4725     /* add local non-zero cols of this proc's seqmat into lnk */
4726     arow  = owners[rank] + i;
4727     anzi  = ai[arow+1] - ai[arow];
4728     aj    = a->j + ai[arow];
4729     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4730     bnzi += nlnk;
4731     /* add received col data into lnk */
4732     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4733       if (i == *nextrow[k]) { /* i-th row */
4734         anzi  = *(nextai[k]+1) - *nextai[k];
4735         aj    = buf_rj[k] + *nextai[k];
4736         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4737         bnzi += nlnk;
4738         nextrow[k]++; nextai[k]++;
4739       }
4740     }
4741     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4742 
4743     /* if free space is not available, make more free space */
4744     if (current_space->local_remaining<bnzi) {
4745       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4746       nspacedouble++;
4747     }
4748     /* copy data into free space, then initialize lnk */
4749     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4750     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4751 
4752     current_space->array           += bnzi;
4753     current_space->local_used      += bnzi;
4754     current_space->local_remaining -= bnzi;
4755 
4756     bi[i+1] = bi[i] + bnzi;
4757   }
4758 
4759   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4760 
4761   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4762   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4763   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4764 
4765   /* create symbolic parallel matrix B_mpi */
4766   /*---------------------------------------*/
4767   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4768   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4769   if (n==PETSC_DECIDE) {
4770     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4771   } else {
4772     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4773   }
4774   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4775   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4776   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4777   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4778   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4779 
4780   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4781   B_mpi->assembled    = PETSC_FALSE;
4782   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4783   merge->bi           = bi;
4784   merge->bj           = bj;
4785   merge->buf_ri       = buf_ri;
4786   merge->buf_rj       = buf_rj;
4787   merge->coi          = NULL;
4788   merge->coj          = NULL;
4789   merge->owners_co    = NULL;
4790 
4791   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4792 
4793   /* attach the supporting struct to B_mpi for reuse */
4794   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4795   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4796   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4797   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4798   *mpimat = B_mpi;
4799 
4800   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4801   PetscFunctionReturn(0);
4802 }
4803 
4804 /*@C
4805       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4806                  matrices from each processor
4807 
4808     Collective on MPI_Comm
4809 
4810    Input Parameters:
4811 +    comm - the communicators the parallel matrix will live on
4812 .    seqmat - the input sequential matrices
4813 .    m - number of local rows (or PETSC_DECIDE)
4814 .    n - number of local columns (or PETSC_DECIDE)
4815 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4816 
4817    Output Parameter:
4818 .    mpimat - the parallel matrix generated
4819 
4820     Level: advanced
4821 
4822    Notes:
4823      The dimensions of the sequential matrix in each processor MUST be the same.
4824      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4825      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4826 @*/
4827 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4828 {
4829   PetscErrorCode ierr;
4830   PetscMPIInt    size;
4831 
4832   PetscFunctionBegin;
4833   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4834   if (size == 1) {
4835     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4836     if (scall == MAT_INITIAL_MATRIX) {
4837       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4838     } else {
4839       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4840     }
4841     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4842     PetscFunctionReturn(0);
4843   }
4844   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4845   if (scall == MAT_INITIAL_MATRIX) {
4846     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4847   }
4848   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4849   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4850   PetscFunctionReturn(0);
4851 }
4852 
4853 /*@
4854      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4855           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4856           with MatGetSize()
4857 
4858     Not Collective
4859 
4860    Input Parameters:
4861 +    A - the matrix
4862 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4863 
4864    Output Parameter:
4865 .    A_loc - the local sequential matrix generated
4866 
4867     Level: developer
4868 
4869 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4870 
4871 @*/
4872 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4873 {
4874   PetscErrorCode ierr;
4875   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4876   Mat_SeqAIJ     *mat,*a,*b;
4877   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4878   MatScalar      *aa,*ba,*cam;
4879   PetscScalar    *ca;
4880   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4881   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4882   PetscBool      match;
4883   MPI_Comm       comm;
4884   PetscMPIInt    size;
4885 
4886   PetscFunctionBegin;
4887   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4888   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4889   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4890   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4891   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4892 
4893   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4894   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4895   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4896   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4897   aa = a->a; ba = b->a;
4898   if (scall == MAT_INITIAL_MATRIX) {
4899     if (size == 1) {
4900       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4901       PetscFunctionReturn(0);
4902     }
4903 
4904     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4905     ci[0] = 0;
4906     for (i=0; i<am; i++) {
4907       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4908     }
4909     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4910     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4911     k    = 0;
4912     for (i=0; i<am; i++) {
4913       ncols_o = bi[i+1] - bi[i];
4914       ncols_d = ai[i+1] - ai[i];
4915       /* off-diagonal portion of A */
4916       for (jo=0; jo<ncols_o; jo++) {
4917         col = cmap[*bj];
4918         if (col >= cstart) break;
4919         cj[k]   = col; bj++;
4920         ca[k++] = *ba++;
4921       }
4922       /* diagonal portion of A */
4923       for (j=0; j<ncols_d; j++) {
4924         cj[k]   = cstart + *aj++;
4925         ca[k++] = *aa++;
4926       }
4927       /* off-diagonal portion of A */
4928       for (j=jo; j<ncols_o; j++) {
4929         cj[k]   = cmap[*bj++];
4930         ca[k++] = *ba++;
4931       }
4932     }
4933     /* put together the new matrix */
4934     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4935     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4936     /* Since these are PETSc arrays, change flags to free them as necessary. */
4937     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4938     mat->free_a  = PETSC_TRUE;
4939     mat->free_ij = PETSC_TRUE;
4940     mat->nonew   = 0;
4941   } else if (scall == MAT_REUSE_MATRIX) {
4942     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4943     ci = mat->i; cj = mat->j; cam = mat->a;
4944     for (i=0; i<am; i++) {
4945       /* off-diagonal portion of A */
4946       ncols_o = bi[i+1] - bi[i];
4947       for (jo=0; jo<ncols_o; jo++) {
4948         col = cmap[*bj];
4949         if (col >= cstart) break;
4950         *cam++ = *ba++; bj++;
4951       }
4952       /* diagonal portion of A */
4953       ncols_d = ai[i+1] - ai[i];
4954       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4955       /* off-diagonal portion of A */
4956       for (j=jo; j<ncols_o; j++) {
4957         *cam++ = *ba++; bj++;
4958       }
4959     }
4960   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4961   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 /*@C
4966      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4967 
4968     Not Collective
4969 
4970    Input Parameters:
4971 +    A - the matrix
4972 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4973 -    row, col - index sets of rows and columns to extract (or NULL)
4974 
4975    Output Parameter:
4976 .    A_loc - the local sequential matrix generated
4977 
4978     Level: developer
4979 
4980 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4981 
4982 @*/
4983 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4984 {
4985   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4986   PetscErrorCode ierr;
4987   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4988   IS             isrowa,iscola;
4989   Mat            *aloc;
4990   PetscBool      match;
4991 
4992   PetscFunctionBegin;
4993   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4994   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4995   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4996   if (!row) {
4997     start = A->rmap->rstart; end = A->rmap->rend;
4998     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4999   } else {
5000     isrowa = *row;
5001   }
5002   if (!col) {
5003     start = A->cmap->rstart;
5004     cmap  = a->garray;
5005     nzA   = a->A->cmap->n;
5006     nzB   = a->B->cmap->n;
5007     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5008     ncols = 0;
5009     for (i=0; i<nzB; i++) {
5010       if (cmap[i] < start) idx[ncols++] = cmap[i];
5011       else break;
5012     }
5013     imark = i;
5014     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5015     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5016     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5017   } else {
5018     iscola = *col;
5019   }
5020   if (scall != MAT_INITIAL_MATRIX) {
5021     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5022     aloc[0] = *A_loc;
5023   }
5024   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5025   *A_loc = aloc[0];
5026   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5027   if (!row) {
5028     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5029   }
5030   if (!col) {
5031     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5032   }
5033   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5034   PetscFunctionReturn(0);
5035 }
5036 
5037 /*@C
5038     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5039 
5040     Collective on Mat
5041 
5042    Input Parameters:
5043 +    A,B - the matrices in mpiaij format
5044 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5045 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5046 
5047    Output Parameter:
5048 +    rowb, colb - index sets of rows and columns of B to extract
5049 -    B_seq - the sequential matrix generated
5050 
5051     Level: developer
5052 
5053 @*/
5054 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5055 {
5056   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5057   PetscErrorCode ierr;
5058   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5059   IS             isrowb,iscolb;
5060   Mat            *bseq=NULL;
5061 
5062   PetscFunctionBegin;
5063   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5064     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5065   }
5066   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5067 
5068   if (scall == MAT_INITIAL_MATRIX) {
5069     start = A->cmap->rstart;
5070     cmap  = a->garray;
5071     nzA   = a->A->cmap->n;
5072     nzB   = a->B->cmap->n;
5073     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5074     ncols = 0;
5075     for (i=0; i<nzB; i++) {  /* row < local row index */
5076       if (cmap[i] < start) idx[ncols++] = cmap[i];
5077       else break;
5078     }
5079     imark = i;
5080     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5081     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5082     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5083     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5084   } else {
5085     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5086     isrowb  = *rowb; iscolb = *colb;
5087     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5088     bseq[0] = *B_seq;
5089   }
5090   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5091   *B_seq = bseq[0];
5092   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5093   if (!rowb) {
5094     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5095   } else {
5096     *rowb = isrowb;
5097   }
5098   if (!colb) {
5099     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5100   } else {
5101     *colb = iscolb;
5102   }
5103   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5104   PetscFunctionReturn(0);
5105 }
5106 
5107 /*
5108     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5109     of the OFF-DIAGONAL portion of local A
5110 
5111     Collective on Mat
5112 
5113    Input Parameters:
5114 +    A,B - the matrices in mpiaij format
5115 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5116 
5117    Output Parameter:
5118 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5119 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5120 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5121 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5122 
5123     Level: developer
5124 
5125 */
5126 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5127 {
5128   VecScatter_MPI_General *gen_to,*gen_from;
5129   PetscErrorCode         ierr;
5130   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5131   Mat_SeqAIJ             *b_oth;
5132   VecScatter             ctx;
5133   MPI_Comm               comm;
5134   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5135   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5136   PetscInt               *rvalues,*svalues;
5137   MatScalar              *b_otha,*bufa,*bufA;
5138   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5139   MPI_Request            *rwaits = NULL,*swaits = NULL;
5140   MPI_Status             *sstatus,rstatus;
5141   PetscMPIInt            jj,size;
5142   PetscInt               *cols,sbs,rbs;
5143   PetscScalar            *vals;
5144 
5145   PetscFunctionBegin;
5146   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5147   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5148 
5149   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5150     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5151   }
5152   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5153   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5154 
5155   if (size == 1) {
5156     startsj_s = NULL;
5157     bufa_ptr  = NULL;
5158     *B_oth    = NULL;
5159     PetscFunctionReturn(0);
5160   }
5161 
5162   if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */
5163     a->Mvctx_mpi1_flg = PETSC_TRUE;
5164     ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5165   }
5166   ctx = a->Mvctx_mpi1;
5167   tag = ((PetscObject)ctx)->tag;
5168 
5169   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5170   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5171   nrecvs   = gen_from->n;
5172   nsends   = gen_to->n;
5173 
5174   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5175   srow    = gen_to->indices;    /* local row index to be sent */
5176   sstarts = gen_to->starts;
5177   sprocs  = gen_to->procs;
5178   sstatus = gen_to->sstatus;
5179   sbs     = gen_to->bs;
5180   rstarts = gen_from->starts;
5181   rprocs  = gen_from->procs;
5182   rbs     = gen_from->bs;
5183 
5184   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5185   if (scall == MAT_INITIAL_MATRIX) {
5186     /* i-array */
5187     /*---------*/
5188     /*  post receives */
5189     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5190     for (i=0; i<nrecvs; i++) {
5191       rowlen = rvalues + rstarts[i]*rbs;
5192       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5193       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5194     }
5195 
5196     /* pack the outgoing message */
5197     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5198 
5199     sstartsj[0] = 0;
5200     rstartsj[0] = 0;
5201     len         = 0; /* total length of j or a array to be sent */
5202     k           = 0;
5203     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5204     for (i=0; i<nsends; i++) {
5205       rowlen = svalues + sstarts[i]*sbs;
5206       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5207       for (j=0; j<nrows; j++) {
5208         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5209         for (l=0; l<sbs; l++) {
5210           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5211 
5212           rowlen[j*sbs+l] = ncols;
5213 
5214           len += ncols;
5215           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5216         }
5217         k++;
5218       }
5219       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5220 
5221       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5222     }
5223     /* recvs and sends of i-array are completed */
5224     i = nrecvs;
5225     while (i--) {
5226       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5227     }
5228     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5229     ierr = PetscFree(svalues);CHKERRQ(ierr);
5230 
5231     /* allocate buffers for sending j and a arrays */
5232     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5233     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5234 
5235     /* create i-array of B_oth */
5236     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5237 
5238     b_othi[0] = 0;
5239     len       = 0; /* total length of j or a array to be received */
5240     k         = 0;
5241     for (i=0; i<nrecvs; i++) {
5242       rowlen = rvalues + rstarts[i]*rbs;
5243       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5244       for (j=0; j<nrows; j++) {
5245         b_othi[k+1] = b_othi[k] + rowlen[j];
5246         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5247         k++;
5248       }
5249       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5250     }
5251     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5252 
5253     /* allocate space for j and a arrrays of B_oth */
5254     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5255     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5256 
5257     /* j-array */
5258     /*---------*/
5259     /*  post receives of j-array */
5260     for (i=0; i<nrecvs; i++) {
5261       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5262       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5263     }
5264 
5265     /* pack the outgoing message j-array */
5266     k = 0;
5267     for (i=0; i<nsends; i++) {
5268       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5269       bufJ  = bufj+sstartsj[i];
5270       for (j=0; j<nrows; j++) {
5271         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5272         for (ll=0; ll<sbs; ll++) {
5273           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5274           for (l=0; l<ncols; l++) {
5275             *bufJ++ = cols[l];
5276           }
5277           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5278         }
5279       }
5280       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5281     }
5282 
5283     /* recvs and sends of j-array are completed */
5284     i = nrecvs;
5285     while (i--) {
5286       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5287     }
5288     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5289   } else if (scall == MAT_REUSE_MATRIX) {
5290     sstartsj = *startsj_s;
5291     rstartsj = *startsj_r;
5292     bufa     = *bufa_ptr;
5293     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5294     b_otha   = b_oth->a;
5295   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5296 
5297   /* a-array */
5298   /*---------*/
5299   /*  post receives of a-array */
5300   for (i=0; i<nrecvs; i++) {
5301     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5302     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5303   }
5304 
5305   /* pack the outgoing message a-array */
5306   k = 0;
5307   for (i=0; i<nsends; i++) {
5308     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5309     bufA  = bufa+sstartsj[i];
5310     for (j=0; j<nrows; j++) {
5311       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5312       for (ll=0; ll<sbs; ll++) {
5313         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5314         for (l=0; l<ncols; l++) {
5315           *bufA++ = vals[l];
5316         }
5317         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5318       }
5319     }
5320     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5321   }
5322   /* recvs and sends of a-array are completed */
5323   i = nrecvs;
5324   while (i--) {
5325     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5326   }
5327   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5328   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5329 
5330   if (scall == MAT_INITIAL_MATRIX) {
5331     /* put together the new matrix */
5332     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5333 
5334     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5335     /* Since these are PETSc arrays, change flags to free them as necessary. */
5336     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5337     b_oth->free_a  = PETSC_TRUE;
5338     b_oth->free_ij = PETSC_TRUE;
5339     b_oth->nonew   = 0;
5340 
5341     ierr = PetscFree(bufj);CHKERRQ(ierr);
5342     if (!startsj_s || !bufa_ptr) {
5343       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5344       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5345     } else {
5346       *startsj_s = sstartsj;
5347       *startsj_r = rstartsj;
5348       *bufa_ptr  = bufa;
5349     }
5350   }
5351   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5352   PetscFunctionReturn(0);
5353 }
5354 
5355 /*@C
5356   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5357 
5358   Not Collective
5359 
5360   Input Parameters:
5361 . A - The matrix in mpiaij format
5362 
5363   Output Parameter:
5364 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5365 . colmap - A map from global column index to local index into lvec
5366 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5367 
5368   Level: developer
5369 
5370 @*/
5371 #if defined(PETSC_USE_CTABLE)
5372 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5373 #else
5374 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5375 #endif
5376 {
5377   Mat_MPIAIJ *a;
5378 
5379   PetscFunctionBegin;
5380   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5381   PetscValidPointer(lvec, 2);
5382   PetscValidPointer(colmap, 3);
5383   PetscValidPointer(multScatter, 4);
5384   a = (Mat_MPIAIJ*) A->data;
5385   if (lvec) *lvec = a->lvec;
5386   if (colmap) *colmap = a->colmap;
5387   if (multScatter) *multScatter = a->Mvctx;
5388   PetscFunctionReturn(0);
5389 }
5390 
5391 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5392 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5393 #if defined(PETSC_HAVE_MKL_SPARSE)
5394 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5395 #endif
5396 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5397 #if defined(PETSC_HAVE_ELEMENTAL)
5398 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5399 #endif
5400 #if defined(PETSC_HAVE_HYPRE)
5401 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5402 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5403 #endif
5404 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5405 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5406 
5407 /*
5408     Computes (B'*A')' since computing B*A directly is untenable
5409 
5410                n                       p                          p
5411         (              )       (              )         (                  )
5412       m (      A       )  *  n (       B      )   =   m (         C        )
5413         (              )       (              )         (                  )
5414 
5415 */
5416 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5417 {
5418   PetscErrorCode ierr;
5419   Mat            At,Bt,Ct;
5420 
5421   PetscFunctionBegin;
5422   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5423   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5424   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5425   ierr = MatDestroy(&At);CHKERRQ(ierr);
5426   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5427   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5428   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5429   PetscFunctionReturn(0);
5430 }
5431 
5432 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5433 {
5434   PetscErrorCode ierr;
5435   PetscInt       m=A->rmap->n,n=B->cmap->n;
5436   Mat            Cmat;
5437 
5438   PetscFunctionBegin;
5439   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5440   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5441   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5442   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5443   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5444   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5445   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5446   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5447 
5448   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5449 
5450   *C = Cmat;
5451   PetscFunctionReturn(0);
5452 }
5453 
5454 /* ----------------------------------------------------------------*/
5455 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5456 {
5457   PetscErrorCode ierr;
5458 
5459   PetscFunctionBegin;
5460   if (scall == MAT_INITIAL_MATRIX) {
5461     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5462     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5463     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5464   }
5465   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5466   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5467   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5468   PetscFunctionReturn(0);
5469 }
5470 
5471 /*MC
5472    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5473 
5474    Options Database Keys:
5475 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5476 
5477   Level: beginner
5478 
5479 .seealso: MatCreateAIJ()
5480 M*/
5481 
5482 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5483 {
5484   Mat_MPIAIJ     *b;
5485   PetscErrorCode ierr;
5486   PetscMPIInt    size;
5487 
5488   PetscFunctionBegin;
5489   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5490 
5491   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5492   B->data       = (void*)b;
5493   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5494   B->assembled  = PETSC_FALSE;
5495   B->insertmode = NOT_SET_VALUES;
5496   b->size       = size;
5497 
5498   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5499 
5500   /* build cache for off array entries formed */
5501   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5502 
5503   b->donotstash  = PETSC_FALSE;
5504   b->colmap      = 0;
5505   b->garray      = 0;
5506   b->roworiented = PETSC_TRUE;
5507 
5508   /* stuff used for matrix vector multiply */
5509   b->lvec  = NULL;
5510   b->Mvctx = NULL;
5511 
5512   /* stuff for MatGetRow() */
5513   b->rowindices   = 0;
5514   b->rowvalues    = 0;
5515   b->getrowactive = PETSC_FALSE;
5516 
5517   /* flexible pointer used in CUSP/CUSPARSE classes */
5518   b->spptr = NULL;
5519 
5520   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5521   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5522   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5524   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5529 #if defined(PETSC_HAVE_MKL_SPARSE)
5530   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5531 #endif
5532   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5534 #if defined(PETSC_HAVE_ELEMENTAL)
5535   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5536 #endif
5537 #if defined(PETSC_HAVE_HYPRE)
5538   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5539 #endif
5540   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5545 #if defined(PETSC_HAVE_HYPRE)
5546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5547 #endif
5548   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5549   PetscFunctionReturn(0);
5550 }
5551 
5552 /*@C
5553      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5554          and "off-diagonal" part of the matrix in CSR format.
5555 
5556    Collective on MPI_Comm
5557 
5558    Input Parameters:
5559 +  comm - MPI communicator
5560 .  m - number of local rows (Cannot be PETSC_DECIDE)
5561 .  n - This value should be the same as the local size used in creating the
5562        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5563        calculated if N is given) For square matrices n is almost always m.
5564 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5565 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5566 .   i - row indices for "diagonal" portion of matrix
5567 .   j - column indices
5568 .   a - matrix values
5569 .   oi - row indices for "off-diagonal" portion of matrix
5570 .   oj - column indices
5571 -   oa - matrix values
5572 
5573    Output Parameter:
5574 .   mat - the matrix
5575 
5576    Level: advanced
5577 
5578    Notes:
5579        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5580        must free the arrays once the matrix has been destroyed and not before.
5581 
5582        The i and j indices are 0 based
5583 
5584        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5585 
5586        This sets local rows and cannot be used to set off-processor values.
5587 
5588        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5589        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5590        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5591        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5592        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5593        communication if it is known that only local entries will be set.
5594 
5595 .keywords: matrix, aij, compressed row, sparse, parallel
5596 
5597 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5598           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5599 @*/
5600 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5601 {
5602   PetscErrorCode ierr;
5603   Mat_MPIAIJ     *maij;
5604 
5605   PetscFunctionBegin;
5606   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5607   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5608   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5609   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5610   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5611   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5612   maij = (Mat_MPIAIJ*) (*mat)->data;
5613 
5614   (*mat)->preallocated = PETSC_TRUE;
5615 
5616   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5617   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5618 
5619   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5620   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5621 
5622   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5623   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5624   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5625   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626 
5627   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5628   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5631   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5632   PetscFunctionReturn(0);
5633 }
5634 
5635 /*
5636     Special version for direct calls from Fortran
5637 */
5638 #include <petsc/private/fortranimpl.h>
5639 
5640 /* Change these macros so can be used in void function */
5641 #undef CHKERRQ
5642 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5643 #undef SETERRQ2
5644 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5645 #undef SETERRQ3
5646 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5647 #undef SETERRQ
5648 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5649 
5650 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5651 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5653 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5654 #else
5655 #endif
5656 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5657 {
5658   Mat            mat  = *mmat;
5659   PetscInt       m    = *mm, n = *mn;
5660   InsertMode     addv = *maddv;
5661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5662   PetscScalar    value;
5663   PetscErrorCode ierr;
5664 
5665   MatCheckPreallocated(mat,1);
5666   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5667 
5668 #if defined(PETSC_USE_DEBUG)
5669   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5670 #endif
5671   {
5672     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5673     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5674     PetscBool roworiented = aij->roworiented;
5675 
5676     /* Some Variables required in the macro */
5677     Mat        A                 = aij->A;
5678     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5679     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5680     MatScalar  *aa               = a->a;
5681     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5682     Mat        B                 = aij->B;
5683     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5684     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5685     MatScalar  *ba               = b->a;
5686 
5687     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5688     PetscInt  nonew = a->nonew;
5689     MatScalar *ap1,*ap2;
5690 
5691     PetscFunctionBegin;
5692     for (i=0; i<m; i++) {
5693       if (im[i] < 0) continue;
5694 #if defined(PETSC_USE_DEBUG)
5695       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5696 #endif
5697       if (im[i] >= rstart && im[i] < rend) {
5698         row      = im[i] - rstart;
5699         lastcol1 = -1;
5700         rp1      = aj + ai[row];
5701         ap1      = aa + ai[row];
5702         rmax1    = aimax[row];
5703         nrow1    = ailen[row];
5704         low1     = 0;
5705         high1    = nrow1;
5706         lastcol2 = -1;
5707         rp2      = bj + bi[row];
5708         ap2      = ba + bi[row];
5709         rmax2    = bimax[row];
5710         nrow2    = bilen[row];
5711         low2     = 0;
5712         high2    = nrow2;
5713 
5714         for (j=0; j<n; j++) {
5715           if (roworiented) value = v[i*n+j];
5716           else value = v[i+j*m];
5717           if (in[j] >= cstart && in[j] < cend) {
5718             col = in[j] - cstart;
5719             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5720             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5721           } else if (in[j] < 0) continue;
5722 #if defined(PETSC_USE_DEBUG)
5723           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5724 #endif
5725           else {
5726             if (mat->was_assembled) {
5727               if (!aij->colmap) {
5728                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5729               }
5730 #if defined(PETSC_USE_CTABLE)
5731               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5732               col--;
5733 #else
5734               col = aij->colmap[in[j]] - 1;
5735 #endif
5736               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5737               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5738                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5739                 col  =  in[j];
5740                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5741                 B     = aij->B;
5742                 b     = (Mat_SeqAIJ*)B->data;
5743                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5744                 rp2   = bj + bi[row];
5745                 ap2   = ba + bi[row];
5746                 rmax2 = bimax[row];
5747                 nrow2 = bilen[row];
5748                 low2  = 0;
5749                 high2 = nrow2;
5750                 bm    = aij->B->rmap->n;
5751                 ba    = b->a;
5752               }
5753             } else col = in[j];
5754             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5755           }
5756         }
5757       } else if (!aij->donotstash) {
5758         if (roworiented) {
5759           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5760         } else {
5761           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5762         }
5763       }
5764     }
5765   }
5766   PetscFunctionReturnVoid();
5767 }
5768 
5769