xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9b4aee8fa9eab9150e75bfc76647aaee89ba61f4)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125 
126   PetscFunctionBegin;
127   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
128     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
129   } else {
130     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
138   PetscErrorCode ierr;
139   PetscInt       i,rstart,nrows,*rows;
140 
141   PetscFunctionBegin;
142   *zrows = NULL;
143   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
144   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
145   for (i=0; i<nrows; i++) rows[i] += rstart;
146   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
154   PetscInt       i,n,*garray = aij->garray;
155   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
156   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
157   PetscReal      *work;
158 
159   PetscFunctionBegin;
160   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
161   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
162   if (type == NORM_2) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
168     }
169   } else if (type == NORM_1) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
175     }
176   } else if (type == NORM_INFINITY) {
177     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
178       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
179     }
180     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
181       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
182     }
183 
184   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
185   if (type == NORM_INFINITY) {
186     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
187   } else {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   }
190   ierr = PetscFree(work);CHKERRQ(ierr);
191   if (type == NORM_2) {
192     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
198 {
199   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
200   IS              sis,gis;
201   PetscErrorCode  ierr;
202   const PetscInt  *isis,*igis;
203   PetscInt        n,*iis,nsis,ngis,rstart,i;
204 
205   PetscFunctionBegin;
206   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
207   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
208   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
209   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
210   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
211   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
212 
213   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
215   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
216   n    = ngis + nsis;
217   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
218   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
219   for (i=0; i<n; i++) iis[i] += rstart;
220   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
221 
222   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
223   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
224   ierr = ISDestroy(&sis);CHKERRQ(ierr);
225   ierr = ISDestroy(&gis);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 /*
230     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
231     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
232 
233     Only for square matrices
234 
235     Used by a preconditioner, hence PETSC_EXTERN
236 */
237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
238 {
239   PetscMPIInt    rank,size;
240   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
241   PetscErrorCode ierr;
242   Mat            mat;
243   Mat_SeqAIJ     *gmata;
244   PetscMPIInt    tag;
245   MPI_Status     status;
246   PetscBool      aij;
247   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
248 
249   PetscFunctionBegin;
250   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
252   if (!rank) {
253     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
254     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
255   }
256   if (reuse == MAT_INITIAL_MATRIX) {
257     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
258     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
259     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
260     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
261     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
262     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
263     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
264     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
265     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
266 
267     rowners[0] = 0;
268     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
269     rstart = rowners[rank];
270     rend   = rowners[rank+1];
271     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
272     if (!rank) {
273       gmata = (Mat_SeqAIJ*) gmat->data;
274       /* send row lengths to all processors */
275       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
276       for (i=1; i<size; i++) {
277         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
278       }
279       /* determine number diagonal and off-diagonal counts */
280       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
281       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
282       jj   = 0;
283       for (i=0; i<m; i++) {
284         for (j=0; j<dlens[i]; j++) {
285           if (gmata->j[jj] < rstart) ld[i]++;
286           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
287           jj++;
288         }
289       }
290       /* send column indices to other processes */
291       for (i=1; i<size; i++) {
292         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
293         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
295       }
296 
297       /* send numerical values to other processes */
298       for (i=1; i<size; i++) {
299         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
300         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
301       }
302       gmataa = gmata->a;
303       gmataj = gmata->j;
304 
305     } else {
306       /* receive row lengths */
307       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       /* receive column indices */
309       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
311       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       /* determine number diagonal and off-diagonal counts */
313       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
314       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
315       jj   = 0;
316       for (i=0; i<m; i++) {
317         for (j=0; j<dlens[i]; j++) {
318           if (gmataj[jj] < rstart) ld[i]++;
319           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
320           jj++;
321         }
322       }
323       /* receive numerical values */
324       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
325       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
326     }
327     /* set preallocation */
328     for (i=0; i<m; i++) {
329       dlens[i] -= olens[i];
330     }
331     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
332     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
333 
334     for (i=0; i<m; i++) {
335       dlens[i] += olens[i];
336     }
337     cnt = 0;
338     for (i=0; i<m; i++) {
339       row  = rstart + i;
340       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
341       cnt += dlens[i];
342     }
343     if (rank) {
344       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
345     }
346     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
347     ierr = PetscFree(rowners);CHKERRQ(ierr);
348 
349     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
350 
351     *inmat = mat;
352   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
353     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
354     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
355     mat  = *inmat;
356     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
357     if (!rank) {
358       /* send numerical values to other processes */
359       gmata  = (Mat_SeqAIJ*) gmat->data;
360       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
361       gmataa = gmata->a;
362       for (i=1; i<size; i++) {
363         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
364         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
365       }
366       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
367     } else {
368       /* receive numerical values from process 0*/
369       nz   = Ad->nz + Ao->nz;
370       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
371       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
372     }
373     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
374     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
375     ad = Ad->a;
376     ao = Ao->a;
377     if (mat->rmap->n) {
378       i  = 0;
379       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
380       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
381     }
382     for (i=1; i<mat->rmap->n; i++) {
383       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
384       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
385     }
386     i--;
387     if (mat->rmap->n) {
388       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
389     }
390     if (rank) {
391       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
392     }
393   }
394   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
396   PetscFunctionReturn(0);
397 }
398 
399 /*
400   Local utility routine that creates a mapping from the global column
401 number to the local number in the off-diagonal part of the local
402 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
403 a slightly higher hash table cost; without it it is not scalable (each processor
404 has an order N integer array but is fast to acess.
405 */
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
463   { \
464     if (col <= lastcol2) low2 = 0;                        \
465     else high2 = nrow2;                                   \
466     lastcol2 = col;                                       \
467     while (high2-low2 > 5) {                              \
468       t = (low2+high2)/2;                                 \
469       if (rp2[t] > col) high2 = t;                        \
470       else             low2  = t;                         \
471     }                                                     \
472     for (_i=low2; _i<high2; _i++) {                       \
473       if (rp2[_i] > col) break;                           \
474       if (rp2[_i] == col) {                               \
475         if (addv == ADD_VALUES) ap2[_i] += value;         \
476         else                    ap2[_i] = value;          \
477         goto b_noinsert;                                  \
478       }                                                   \
479     }                                                     \
480     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
481     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
482     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
483     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
484     N = nrow2++ - 1; b->nz++; high2++;                    \
485     /* shift up all the later entries in this row */      \
486     for (ii=N; ii>=_i; ii--) {                            \
487       rp2[ii+1] = rp2[ii];                                \
488       ap2[ii+1] = ap2[ii];                                \
489     }                                                     \
490     rp2[_i] = col;                                        \
491     ap2[_i] = value;                                      \
492     B->nonzerostate++;                                    \
493     b_noinsert: ;                                         \
494     bilen[row] = nrow2;                                   \
495   }
496 
497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
498 {
499   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
500   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
501   PetscErrorCode ierr;
502   PetscInt       l,*garray = mat->garray,diag;
503 
504   PetscFunctionBegin;
505   /* code only works for square matrices A */
506 
507   /* find size of row to the left of the diagonal part */
508   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
509   row  = row - diag;
510   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
511     if (garray[b->j[b->i[row]+l]] > diag) break;
512   }
513   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
514 
515   /* diagonal part */
516   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* right of diagonal part */
519   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
524 {
525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
526   PetscScalar    value;
527   PetscErrorCode ierr;
528   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
529   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
530   PetscBool      roworiented = aij->roworiented;
531 
532   /* Some Variables required in the macro */
533   Mat        A                 = aij->A;
534   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
535   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
536   MatScalar  *aa               = a->a;
537   PetscBool  ignorezeroentries = a->ignorezeroentries;
538   Mat        B                 = aij->B;
539   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
540   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
541   MatScalar  *ba               = b->a;
542 
543   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
544   PetscInt  nonew;
545   MatScalar *ap1,*ap2;
546 
547   PetscFunctionBegin;
548   for (i=0; i<m; i++) {
549     if (im[i] < 0) continue;
550 #if defined(PETSC_USE_DEBUG)
551     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
552 #endif
553     if (im[i] >= rstart && im[i] < rend) {
554       row      = im[i] - rstart;
555       lastcol1 = -1;
556       rp1      = aj + ai[row];
557       ap1      = aa + ai[row];
558       rmax1    = aimax[row];
559       nrow1    = ailen[row];
560       low1     = 0;
561       high1    = nrow1;
562       lastcol2 = -1;
563       rp2      = bj + bi[row];
564       ap2      = ba + bi[row];
565       rmax2    = bimax[row];
566       nrow2    = bilen[row];
567       low2     = 0;
568       high2    = nrow2;
569 
570       for (j=0; j<n; j++) {
571         if (roworiented) value = v[i*n+j];
572         else             value = v[i+j*m];
573         if (in[j] >= cstart && in[j] < cend) {
574           col   = in[j] - cstart;
575           nonew = a->nonew;
576           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
577           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
578         } else if (in[j] < 0) continue;
579 #if defined(PETSC_USE_DEBUG)
580         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
581 #endif
582         else {
583           if (mat->was_assembled) {
584             if (!aij->colmap) {
585               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
586             }
587 #if defined(PETSC_USE_CTABLE)
588             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
589             col--;
590 #else
591             col = aij->colmap[in[j]] - 1;
592 #endif
593             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
594               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
595               col  =  in[j];
596               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
597               B     = aij->B;
598               b     = (Mat_SeqAIJ*)B->data;
599               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
600               rp2   = bj + bi[row];
601               ap2   = ba + bi[row];
602               rmax2 = bimax[row];
603               nrow2 = bilen[row];
604               low2  = 0;
605               high2 = nrow2;
606               bm    = aij->B->rmap->n;
607               ba    = b->a;
608             } else if (col < 0) {
609               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
610                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
611               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
612             }
613           } else col = in[j];
614           nonew = b->nonew;
615           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
616         }
617       }
618     } else {
619       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
620       if (!aij->donotstash) {
621         mat->assembled = PETSC_FALSE;
622         if (roworiented) {
623           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
624         } else {
625           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         }
627       }
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   PetscErrorCode ierr;
637   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
638   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
639 
640   PetscFunctionBegin;
641   for (i=0; i<m; i++) {
642     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
643     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
644     if (idxm[i] >= rstart && idxm[i] < rend) {
645       row = idxm[i] - rstart;
646       for (j=0; j<n; j++) {
647         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
648         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
649         if (idxn[j] >= cstart && idxn[j] < cend) {
650           col  = idxn[j] - cstart;
651           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
652         } else {
653           if (!aij->colmap) {
654             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
655           }
656 #if defined(PETSC_USE_CTABLE)
657           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
658           col--;
659 #else
660           col = aij->colmap[idxn[j]] - 1;
661 #endif
662           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
663           else {
664             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
665           }
666         }
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
674 
675 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
676 {
677   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
678   PetscErrorCode ierr;
679   PetscInt       nstash,reallocs;
680 
681   PetscFunctionBegin;
682   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
683 
684   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
685   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
686   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
691 {
692   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
693   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
694   PetscErrorCode ierr;
695   PetscMPIInt    n;
696   PetscInt       i,j,rstart,ncols,flg;
697   PetscInt       *row,*col;
698   PetscBool      other_disassembled;
699   PetscScalar    *val;
700 
701   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
702 
703   PetscFunctionBegin;
704   if (!aij->donotstash && !mat->nooffprocentries) {
705     while (1) {
706       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
707       if (!flg) break;
708 
709       for (i=0; i<n; ) {
710         /* Now identify the consecutive vals belonging to the same row */
711         for (j=i,rstart=row[j]; j<n; j++) {
712           if (row[j] != rstart) break;
713         }
714         if (j < n) ncols = j-i;
715         else       ncols = n-i;
716         /* Now assemble all these values with a single function call */
717         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
718 
719         i = j;
720       }
721     }
722     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
723   }
724   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
725   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
726 
727   /* determine if any processor has disassembled, if so we must
728      also disassemble ourselfs, in order that we may reassemble. */
729   /*
730      if nonzero structure of submatrix B cannot change then we know that
731      no processor disassembled thus we can skip this stuff
732   */
733   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
734     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
735     if (mat->was_assembled && !other_disassembled) {
736       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
737     }
738   }
739   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
740     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
741   }
742   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
743   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
744   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
745 
746   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
747 
748   aij->rowvalues = 0;
749 
750   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
751   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
752 
753   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
754   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
755     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
756     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
762 {
763   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
768   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
775   PetscInt      *lrows;
776   PetscInt       r, len;
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   /* get locally owned rows */
781   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
796     PetscBool cong;
797     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
798     if (cong) A->congruentlayouts = 1;
799     else      A->congruentlayouts = 0;
800   }
801   if ((diag != 0.0) && A->congruentlayouts) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803   } else if (diag != 0.0) {
804     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
805     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
806     for (r = 0; r < len; ++r) {
807       const PetscInt row = lrows[r] + A->rmap->rstart;
808       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
809     }
810     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
812   } else {
813     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
814   }
815   ierr = PetscFree(lrows);CHKERRQ(ierr);
816 
817   /* only change matrix nonzero state if pattern was allowed to be changed */
818   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
819     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
820     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
826 {
827   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
828   PetscErrorCode    ierr;
829   PetscMPIInt       n = A->rmap->n;
830   PetscInt          i,j,r,m,p = 0,len = 0;
831   PetscInt          *lrows,*owners = A->rmap->range;
832   PetscSFNode       *rrows;
833   PetscSF           sf;
834   const PetscScalar *xx;
835   PetscScalar       *bb,*mask;
836   Vec               xmask,lmask;
837   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
838   const PetscInt    *aj, *ii,*ridx;
839   PetscScalar       *aa;
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
849     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
850       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
851     }
852     rrows[r].rank  = p;
853     rrows[r].index = rows[r] - owners[p];
854   }
855   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
856   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
857   /* Collect flags for rows to be zeroed */
858   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
859   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
860   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
861   /* Compress and put in row numbers */
862   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
863   /* zero diagonal part of matrix */
864   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
865   /* handle off diagonal part of matrix */
866   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
867   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
868   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
869   for (i=0; i<len; i++) bb[lrows[i]] = 1;
870   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
871   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
874   if (x) {
875     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
878     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
879   }
880   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
881   /* remove zeroed rows of off diagonal matrix */
882   ii = aij->i;
883   for (i=0; i<len; i++) {
884     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
885   }
886   /* loop over all elements of off process part of matrix zeroing removed columns*/
887   if (aij->compressedrow.use) {
888     m    = aij->compressedrow.nrows;
889     ii   = aij->compressedrow.i;
890     ridx = aij->compressedrow.rindex;
891     for (i=0; i<m; i++) {
892       n  = ii[i+1] - ii[i];
893       aj = aij->j + ii[i];
894       aa = aij->a + ii[i];
895 
896       for (j=0; j<n; j++) {
897         if (PetscAbsScalar(mask[*aj])) {
898           if (b) bb[*ridx] -= *aa*xx[*aj];
899           *aa = 0.0;
900         }
901         aa++;
902         aj++;
903       }
904       ridx++;
905     }
906   } else { /* do not use compressed row format */
907     m = l->B->rmap->n;
908     for (i=0; i<m; i++) {
909       n  = ii[i+1] - ii[i];
910       aj = aij->j + ii[i];
911       aa = aij->a + ii[i];
912       for (j=0; j<n; j++) {
913         if (PetscAbsScalar(mask[*aj])) {
914           if (b) bb[i] -= *aa*xx[*aj];
915           *aa = 0.0;
916         }
917         aa++;
918         aj++;
919       }
920     }
921   }
922   if (x) {
923     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
924     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
925   }
926   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
927   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
928   ierr = PetscFree(lrows);CHKERRQ(ierr);
929 
930   /* only change matrix nonzero state if pattern was allowed to be changed */
931   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
932     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
933     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
939 {
940   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
941   PetscErrorCode ierr;
942   PetscInt       nt;
943   VecScatter     Mvctx = a->Mvctx;
944 
945   PetscFunctionBegin;
946   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
947   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
948 
949   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
951   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
957 {
958   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970   VecScatter     Mvctx = a->Mvctx;
971 
972   PetscFunctionBegin;
973   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
974   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
976   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1011 {
1012   MPI_Comm       comm;
1013   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1014   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1015   IS             Me,Notme;
1016   PetscErrorCode ierr;
1017   PetscInt       M,N,first,last,*notme,i;
1018   PetscMPIInt    size;
1019 
1020   PetscFunctionBegin;
1021   /* Easy test: symmetric diagonal block */
1022   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1023   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1024   if (!*f) PetscFunctionReturn(0);
1025   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1026   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1027   if (size == 1) PetscFunctionReturn(0);
1028 
1029   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1030   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1031   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1032   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1033   for (i=0; i<first; i++) notme[i] = i;
1034   for (i=last; i<M; i++) notme[i-last+first] = i;
1035   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1036   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1037   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1038   Aoff = Aoffs[0];
1039   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1040   Boff = Boffs[0];
1041   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1042   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1043   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1044   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1045   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1046   ierr = PetscFree(notme);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1051 {
1052   PetscErrorCode ierr;
1053 
1054   PetscFunctionBegin;
1055   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   /* do nondiagonal part */
1066   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1067   /* send it on its way */
1068   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1069   /* do local part */
1070   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1071   /* receive remote parts */
1072   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*
1077   This only works correctly for square matrices where the subblock A->A is the
1078    diagonal block
1079 */
1080 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1081 {
1082   PetscErrorCode ierr;
1083   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1084 
1085   PetscFunctionBegin;
1086   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1087   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1088   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1093 {
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1099   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1104 {
1105   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109 #if defined(PETSC_USE_LOG)
1110   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1111 #endif
1112   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1114   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1115   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1116 #if defined(PETSC_USE_CTABLE)
1117   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1118 #else
1119   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1120 #endif
1121   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1122   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1123   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1124   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1125   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1126   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1127   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1128 
1129   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1138 #if defined(PETSC_HAVE_ELEMENTAL)
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1140 #endif
1141 #if defined(PETSC_HAVE_HYPRE)
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1144 #endif
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1151 {
1152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1153   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1154   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1155   PetscErrorCode ierr;
1156   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1157   int            fd;
1158   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1159   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1160   PetscScalar    *column_values;
1161   PetscInt       message_count,flowcontrolcount;
1162   FILE           *file;
1163 
1164   PetscFunctionBegin;
1165   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1166   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1167   nz   = A->nz + B->nz;
1168   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1169   if (!rank) {
1170     header[0] = MAT_FILE_CLASSID;
1171     header[1] = mat->rmap->N;
1172     header[2] = mat->cmap->N;
1173 
1174     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     /* get largest number of rows any processor has */
1177     rlen  = mat->rmap->n;
1178     range = mat->rmap->range;
1179     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1180   } else {
1181     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1182     rlen = mat->rmap->n;
1183   }
1184 
1185   /* load up the local row counts */
1186   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1187   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1188 
1189   /* store the row lengths to the file */
1190   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1191   if (!rank) {
1192     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1193     for (i=1; i<size; i++) {
1194       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1195       rlen = range[i+1] - range[i];
1196       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1198     }
1199     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1200   } else {
1201     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1202     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1204   }
1205   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1206 
1207   /* load up the local column indices */
1208   nzmax = nz; /* th processor needs space a largest processor needs */
1209   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1211   cnt   = 0;
1212   for (i=0; i<mat->rmap->n; i++) {
1213     for (j=B->i[i]; j<B->i[i+1]; j++) {
1214       if ((col = garray[B->j[j]]) > cstart) break;
1215       column_indices[cnt++] = col;
1216     }
1217     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1218     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1219   }
1220   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1221 
1222   /* store the column indices to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1230       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1231       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1242 
1243   /* load up the local column values */
1244   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1245   cnt  = 0;
1246   for (i=0; i<mat->rmap->n; i++) {
1247     for (j=B->i[i]; j<B->i[i+1]; j++) {
1248       if (garray[B->j[j]] > cstart) break;
1249       column_values[cnt++] = B->a[j];
1250     }
1251     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1252     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1253   }
1254   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1255 
1256   /* store the column values to the file */
1257   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1258   if (!rank) {
1259     MPI_Status status;
1260     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1261     for (i=1; i<size; i++) {
1262       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1263       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1264       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1265       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1267     }
1268     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1269   } else {
1270     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1271     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1273     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1274   }
1275   ierr = PetscFree(column_values);CHKERRQ(ierr);
1276 
1277   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1278   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank = aij->rank,size = aij->size;
1288   PetscBool         isdraw,iascii,isbinary;
1289   PetscViewer       sviewer;
1290   PetscViewerFormat format;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1296   if (iascii) {
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1299       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1300       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1301       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302       for (i=0; i<(PetscInt)size; i++) {
1303         nmax = PetscMax(nmax,nz[i]);
1304         nmin = PetscMin(nmin,nz[i]);
1305         navg += nz[i];
1306       }
1307       ierr = PetscFree(nz);CHKERRQ(ierr);
1308       navg = navg/size;
1309       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     }
1312     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1313     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1314       MatInfo   info;
1315       PetscBool inodes;
1316 
1317       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1318       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1319       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1320       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1321       if (!inodes) {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1324       } else {
1325         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1326                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1327       }
1328       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1330       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1332       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1334       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1335       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1338       PetscInt inodecount,inodelimit,*inodes;
1339       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1340       if (inodes) {
1341         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1342       } else {
1343         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1344       }
1345       PetscFunctionReturn(0);
1346     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1347       PetscFunctionReturn(0);
1348     }
1349   } else if (isbinary) {
1350     if (size == 1) {
1351       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1352       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1353     } else {
1354       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1355     }
1356     PetscFunctionReturn(0);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1361     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1362     if (isnull) PetscFunctionReturn(0);
1363   }
1364 
1365   {
1366     /* assemble the entire matrix onto first processor. */
1367     Mat        A;
1368     Mat_SeqAIJ *Aloc;
1369     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1370     MatScalar  *a;
1371 
1372     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1373     if (!rank) {
1374       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1375     } else {
1376       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1377     }
1378     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1379     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1380     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1381     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1382     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1383 
1384     /* copy over the A part */
1385     Aloc = (Mat_SeqAIJ*)aij->A->data;
1386     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1387     row  = mat->rmap->rstart;
1388     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1389     for (i=0; i<m; i++) {
1390       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1391       row++;
1392       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1393     }
1394     aj = Aloc->j;
1395     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1396 
1397     /* copy over the B part */
1398     Aloc = (Mat_SeqAIJ*)aij->B->data;
1399     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1400     row  = mat->rmap->rstart;
1401     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1402     ct   = cols;
1403     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1408     }
1409     ierr = PetscFree(ct);CHKERRQ(ierr);
1410     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412     /*
1413        Everyone has to call to draw the matrix since the graphics waits are
1414        synchronized across all processors that share the PetscDraw object
1415     */
1416     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1417     if (!rank) {
1418       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1419       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1420     }
1421     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1422     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1423     ierr = MatDestroy(&A);CHKERRQ(ierr);
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1429 {
1430   PetscErrorCode ierr;
1431   PetscBool      iascii,isdraw,issocket,isbinary;
1432 
1433   PetscFunctionBegin;
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1438   if (iascii || isdraw || isbinary || issocket) {
1439     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1445 {
1446   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1447   PetscErrorCode ierr;
1448   Vec            bb1 = 0;
1449   PetscBool      hasop;
1450 
1451   PetscFunctionBegin;
1452   if (flag == SOR_APPLY_UPPER) {
1453     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1454     PetscFunctionReturn(0);
1455   }
1456 
1457   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1458     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1459   }
1460 
1461   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1462     if (flag & SOR_ZERO_INITIAL_GUESS) {
1463       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1464       its--;
1465     }
1466 
1467     while (its--) {
1468       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1469       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470 
1471       /* update rhs: bb1 = bb - B*x */
1472       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1473       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1474 
1475       /* local sweep */
1476       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1477     }
1478   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1479     if (flag & SOR_ZERO_INITIAL_GUESS) {
1480       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481       its--;
1482     }
1483     while (its--) {
1484       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1485       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486 
1487       /* update rhs: bb1 = bb - B*x */
1488       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1489       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1490 
1491       /* local sweep */
1492       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1493     }
1494   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1495     if (flag & SOR_ZERO_INITIAL_GUESS) {
1496       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1497       its--;
1498     }
1499     while (its--) {
1500       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502 
1503       /* update rhs: bb1 = bb - B*x */
1504       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1505       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1506 
1507       /* local sweep */
1508       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1509     }
1510   } else if (flag & SOR_EISENSTAT) {
1511     Vec xx1;
1512 
1513     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1514     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1515 
1516     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1517     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518     if (!mat->diag) {
1519       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1520       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1521     }
1522     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1523     if (hasop) {
1524       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1525     } else {
1526       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1527     }
1528     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1529 
1530     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1531 
1532     /* local sweep */
1533     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1534     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1535     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1536   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1537 
1538   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1539 
1540   matin->factorerrortype = mat->A->factorerrortype;
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1545 {
1546   Mat            aA,aB,Aperm;
1547   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1548   PetscScalar    *aa,*ba;
1549   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1550   PetscSF        rowsf,sf;
1551   IS             parcolp = NULL;
1552   PetscBool      done;
1553   PetscErrorCode ierr;
1554 
1555   PetscFunctionBegin;
1556   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1557   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1558   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1559   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1560 
1561   /* Invert row permutation to find out where my rows should go */
1562   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1563   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1564   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1565   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1566   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1568 
1569   /* Invert column permutation to find out where my columns should go */
1570   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1571   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1572   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1573   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1574   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1575   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1576   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1577 
1578   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1579   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1580   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1581 
1582   /* Find out where my gcols should go */
1583   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1584   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1585   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1586   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1587   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1588   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1589   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1590   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1591 
1592   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1593   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1594   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1595   for (i=0; i<m; i++) {
1596     PetscInt row = rdest[i],rowner;
1597     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1598     for (j=ai[i]; j<ai[i+1]; j++) {
1599       PetscInt cowner,col = cdest[aj[j]];
1600       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1601       if (rowner == cowner) dnnz[i]++;
1602       else onnz[i]++;
1603     }
1604     for (j=bi[i]; j<bi[i+1]; j++) {
1605       PetscInt cowner,col = gcdest[bj[j]];
1606       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1607       if (rowner == cowner) dnnz[i]++;
1608       else onnz[i]++;
1609     }
1610   }
1611   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1612   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1613   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1614   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1616 
1617   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1618   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1619   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1620   for (i=0; i<m; i++) {
1621     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1622     PetscInt j0,rowlen;
1623     rowlen = ai[i+1] - ai[i];
1624     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1625       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1626       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1627     }
1628     rowlen = bi[i+1] - bi[i];
1629     for (j0=j=0; j<rowlen; j0=j) {
1630       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1631       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1632     }
1633   }
1634   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1635   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1636   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1637   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1638   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1639   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1640   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1641   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1642   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1643   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1644   *B = Aperm;
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1649 {
1650   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1651   PetscErrorCode ierr;
1652 
1653   PetscFunctionBegin;
1654   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1655   if (ghosts) *ghosts = aij->garray;
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1660 {
1661   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1662   Mat            A    = mat->A,B = mat->B;
1663   PetscErrorCode ierr;
1664   PetscReal      isend[5],irecv[5];
1665 
1666   PetscFunctionBegin;
1667   info->block_size = 1.0;
1668   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1669 
1670   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1671   isend[3] = info->memory;  isend[4] = info->mallocs;
1672 
1673   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1674 
1675   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1676   isend[3] += info->memory;  isend[4] += info->mallocs;
1677   if (flag == MAT_LOCAL) {
1678     info->nz_used      = isend[0];
1679     info->nz_allocated = isend[1];
1680     info->nz_unneeded  = isend[2];
1681     info->memory       = isend[3];
1682     info->mallocs      = isend[4];
1683   } else if (flag == MAT_GLOBAL_MAX) {
1684     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1685 
1686     info->nz_used      = irecv[0];
1687     info->nz_allocated = irecv[1];
1688     info->nz_unneeded  = irecv[2];
1689     info->memory       = irecv[3];
1690     info->mallocs      = irecv[4];
1691   } else if (flag == MAT_GLOBAL_SUM) {
1692     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1693 
1694     info->nz_used      = irecv[0];
1695     info->nz_allocated = irecv[1];
1696     info->nz_unneeded  = irecv[2];
1697     info->memory       = irecv[3];
1698     info->mallocs      = irecv[4];
1699   }
1700   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1701   info->fill_ratio_needed = 0;
1702   info->factor_mallocs    = 0;
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1707 {
1708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   switch (op) {
1713   case MAT_NEW_NONZERO_LOCATIONS:
1714   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1715   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1716   case MAT_KEEP_NONZERO_PATTERN:
1717   case MAT_NEW_NONZERO_LOCATION_ERR:
1718   case MAT_USE_INODES:
1719   case MAT_IGNORE_ZERO_ENTRIES:
1720     MatCheckPreallocated(A,1);
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_ROW_ORIENTED:
1725     MatCheckPreallocated(A,1);
1726     a->roworiented = flg;
1727 
1728     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1729     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_NEW_DIAGONALS:
1732     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1733     break;
1734   case MAT_IGNORE_OFF_PROC_ENTRIES:
1735     a->donotstash = flg;
1736     break;
1737   case MAT_SPD:
1738     A->spd_set = PETSC_TRUE;
1739     A->spd     = flg;
1740     if (flg) {
1741       A->symmetric                  = PETSC_TRUE;
1742       A->structurally_symmetric     = PETSC_TRUE;
1743       A->symmetric_set              = PETSC_TRUE;
1744       A->structurally_symmetric_set = PETSC_TRUE;
1745     }
1746     break;
1747   case MAT_SYMMETRIC:
1748     MatCheckPreallocated(A,1);
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_STRUCTURALLY_SYMMETRIC:
1752     MatCheckPreallocated(A,1);
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     break;
1755   case MAT_HERMITIAN:
1756     MatCheckPreallocated(A,1);
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_SYMMETRY_ETERNAL:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     break;
1763   case MAT_SUBMAT_SINGLEIS:
1764     A->submat_singleis = flg;
1765     break;
1766   case MAT_STRUCTURE_ONLY:
1767     /* The option is handled directly by MatSetOption() */
1768     break;
1769   default:
1770     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1771   }
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1776 {
1777   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1778   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1779   PetscErrorCode ierr;
1780   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1781   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1782   PetscInt       *cmap,*idx_p;
1783 
1784   PetscFunctionBegin;
1785   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1786   mat->getrowactive = PETSC_TRUE;
1787 
1788   if (!mat->rowvalues && (idx || v)) {
1789     /*
1790         allocate enough space to hold information from the longest row.
1791     */
1792     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1793     PetscInt   max = 1,tmp;
1794     for (i=0; i<matin->rmap->n; i++) {
1795       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1796       if (max < tmp) max = tmp;
1797     }
1798     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1799   }
1800 
1801   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1802   lrow = row - rstart;
1803 
1804   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1805   if (!v)   {pvA = 0; pvB = 0;}
1806   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1807   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1808   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1809   nztot = nzA + nzB;
1810 
1811   cmap = mat->garray;
1812   if (v  || idx) {
1813     if (nztot) {
1814       /* Sort by increasing column numbers, assuming A and B already sorted */
1815       PetscInt imark = -1;
1816       if (v) {
1817         *v = v_p = mat->rowvalues;
1818         for (i=0; i<nzB; i++) {
1819           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1820           else break;
1821         }
1822         imark = i;
1823         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1824         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1825       }
1826       if (idx) {
1827         *idx = idx_p = mat->rowindices;
1828         if (imark > -1) {
1829           for (i=0; i<imark; i++) {
1830             idx_p[i] = cmap[cworkB[i]];
1831           }
1832         } else {
1833           for (i=0; i<nzB; i++) {
1834             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1835             else break;
1836           }
1837           imark = i;
1838         }
1839         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1840         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1841       }
1842     } else {
1843       if (idx) *idx = 0;
1844       if (v)   *v   = 0;
1845     }
1846   }
1847   *nz  = nztot;
1848   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1849   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1854 {
1855   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1856 
1857   PetscFunctionBegin;
1858   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1859   aij->getrowactive = PETSC_FALSE;
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1864 {
1865   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1866   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1867   PetscErrorCode ierr;
1868   PetscInt       i,j,cstart = mat->cmap->rstart;
1869   PetscReal      sum = 0.0;
1870   MatScalar      *v;
1871 
1872   PetscFunctionBegin;
1873   if (aij->size == 1) {
1874     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1875   } else {
1876     if (type == NORM_FROBENIUS) {
1877       v = amat->a;
1878       for (i=0; i<amat->nz; i++) {
1879         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1880       }
1881       v = bmat->a;
1882       for (i=0; i<bmat->nz; i++) {
1883         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1884       }
1885       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1886       *norm = PetscSqrtReal(*norm);
1887       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1888     } else if (type == NORM_1) { /* max column norm */
1889       PetscReal *tmp,*tmp2;
1890       PetscInt  *jj,*garray = aij->garray;
1891       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1892       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1893       *norm = 0.0;
1894       v     = amat->a; jj = amat->j;
1895       for (j=0; j<amat->nz; j++) {
1896         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1897       }
1898       v = bmat->a; jj = bmat->j;
1899       for (j=0; j<bmat->nz; j++) {
1900         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1901       }
1902       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1903       for (j=0; j<mat->cmap->N; j++) {
1904         if (tmp2[j] > *norm) *norm = tmp2[j];
1905       }
1906       ierr = PetscFree(tmp);CHKERRQ(ierr);
1907       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1908       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1909     } else if (type == NORM_INFINITY) { /* max row norm */
1910       PetscReal ntemp = 0.0;
1911       for (j=0; j<aij->A->rmap->n; j++) {
1912         v   = amat->a + amat->i[j];
1913         sum = 0.0;
1914         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1915           sum += PetscAbsScalar(*v); v++;
1916         }
1917         v = bmat->a + bmat->i[j];
1918         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1919           sum += PetscAbsScalar(*v); v++;
1920         }
1921         if (sum > ntemp) ntemp = sum;
1922       }
1923       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1924       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1925     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1931 {
1932   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1933   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1934   PetscErrorCode ierr;
1935   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1936   PetscInt       cstart = A->cmap->rstart,ncol;
1937   Mat            B;
1938   MatScalar      *array;
1939 
1940   PetscFunctionBegin;
1941   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1942 
1943   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1944   ai = Aloc->i; aj = Aloc->j;
1945   bi = Bloc->i; bj = Bloc->j;
1946   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1947     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1948     PetscSFNode          *oloc;
1949     PETSC_UNUSED PetscSF sf;
1950 
1951     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1952     /* compute d_nnz for preallocation */
1953     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1954     for (i=0; i<ai[ma]; i++) {
1955       d_nnz[aj[i]]++;
1956       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1957     }
1958     /* compute local off-diagonal contributions */
1959     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1960     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1961     /* map those to global */
1962     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1963     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1964     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1965     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1966     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1967     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1968     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1969 
1970     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1971     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1972     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1973     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1974     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1975     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1976   } else {
1977     B    = *matout;
1978     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1979     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1980   }
1981 
1982   /* copy over the A part */
1983   array = Aloc->a;
1984   row   = A->rmap->rstart;
1985   for (i=0; i<ma; i++) {
1986     ncol = ai[i+1]-ai[i];
1987     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1988     row++;
1989     array += ncol; aj += ncol;
1990   }
1991   aj = Aloc->j;
1992   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1993 
1994   /* copy over the B part */
1995   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1996   array = Bloc->a;
1997   row   = A->rmap->rstart;
1998   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1999   cols_tmp = cols;
2000   for (i=0; i<mb; i++) {
2001     ncol = bi[i+1]-bi[i];
2002     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2003     row++;
2004     array += ncol; cols_tmp += ncol;
2005   }
2006   ierr = PetscFree(cols);CHKERRQ(ierr);
2007 
2008   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2009   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2010   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2011     *matout = B;
2012   } else {
2013     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2014   }
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2019 {
2020   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2021   Mat            a    = aij->A,b = aij->B;
2022   PetscErrorCode ierr;
2023   PetscInt       s1,s2,s3;
2024 
2025   PetscFunctionBegin;
2026   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2027   if (rr) {
2028     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2029     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2030     /* Overlap communication with computation. */
2031     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2032   }
2033   if (ll) {
2034     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2035     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2036     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2037   }
2038   /* scale  the diagonal block */
2039   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2040 
2041   if (rr) {
2042     /* Do a scatter end and then right scale the off-diagonal block */
2043     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2044     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2045   }
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2050 {
2051   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2052   PetscErrorCode ierr;
2053 
2054   PetscFunctionBegin;
2055   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2060 {
2061   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2062   Mat            a,b,c,d;
2063   PetscBool      flg;
2064   PetscErrorCode ierr;
2065 
2066   PetscFunctionBegin;
2067   a = matA->A; b = matA->B;
2068   c = matB->A; d = matB->B;
2069 
2070   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2071   if (flg) {
2072     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2073   }
2074   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2079 {
2080   PetscErrorCode ierr;
2081   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2082   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2083 
2084   PetscFunctionBegin;
2085   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2086   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2087     /* because of the column compression in the off-processor part of the matrix a->B,
2088        the number of columns in a->B and b->B may be different, hence we cannot call
2089        the MatCopy() directly on the two parts. If need be, we can provide a more
2090        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2091        then copying the submatrices */
2092     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2093   } else {
2094     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2095     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2096   }
2097   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2102 {
2103   PetscErrorCode ierr;
2104 
2105   PetscFunctionBegin;
2106   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 /*
2111    Computes the number of nonzeros per row needed for preallocation when X and Y
2112    have different nonzero structure.
2113 */
2114 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2115 {
2116   PetscInt       i,j,k,nzx,nzy;
2117 
2118   PetscFunctionBegin;
2119   /* Set the number of nonzeros in the new matrix */
2120   for (i=0; i<m; i++) {
2121     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2122     nzx = xi[i+1] - xi[i];
2123     nzy = yi[i+1] - yi[i];
2124     nnz[i] = 0;
2125     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2126       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2127       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2128       nnz[i]++;
2129     }
2130     for (; k<nzy; k++) nnz[i]++;
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2136 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2137 {
2138   PetscErrorCode ierr;
2139   PetscInt       m = Y->rmap->N;
2140   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2141   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2142 
2143   PetscFunctionBegin;
2144   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2149 {
2150   PetscErrorCode ierr;
2151   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2152   PetscBLASInt   bnz,one=1;
2153   Mat_SeqAIJ     *x,*y;
2154 
2155   PetscFunctionBegin;
2156   if (str == SAME_NONZERO_PATTERN) {
2157     PetscScalar alpha = a;
2158     x    = (Mat_SeqAIJ*)xx->A->data;
2159     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2160     y    = (Mat_SeqAIJ*)yy->A->data;
2161     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2162     x    = (Mat_SeqAIJ*)xx->B->data;
2163     y    = (Mat_SeqAIJ*)yy->B->data;
2164     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2165     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2166     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2167   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2168     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2169   } else {
2170     Mat      B;
2171     PetscInt *nnz_d,*nnz_o;
2172     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2173     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2174     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2175     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2176     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2177     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2178     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2179     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2180     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2181     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2182     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2183     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2184     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2185     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2186   }
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2191 
2192 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2193 {
2194 #if defined(PETSC_USE_COMPLEX)
2195   PetscErrorCode ierr;
2196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2197 
2198   PetscFunctionBegin;
2199   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2200   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2201 #else
2202   PetscFunctionBegin;
2203 #endif
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2208 {
2209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2210   PetscErrorCode ierr;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2214   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2219 {
2220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2221   PetscErrorCode ierr;
2222 
2223   PetscFunctionBegin;
2224   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2225   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2230 {
2231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2232   PetscErrorCode ierr;
2233   PetscInt       i,*idxb = 0;
2234   PetscScalar    *va,*vb;
2235   Vec            vtmp;
2236 
2237   PetscFunctionBegin;
2238   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2239   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2240   if (idx) {
2241     for (i=0; i<A->rmap->n; i++) {
2242       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2243     }
2244   }
2245 
2246   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2247   if (idx) {
2248     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2249   }
2250   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2251   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2252 
2253   for (i=0; i<A->rmap->n; i++) {
2254     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2255       va[i] = vb[i];
2256       if (idx) idx[i] = a->garray[idxb[i]];
2257     }
2258   }
2259 
2260   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2261   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2262   ierr = PetscFree(idxb);CHKERRQ(ierr);
2263   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2268 {
2269   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2270   PetscErrorCode ierr;
2271   PetscInt       i,*idxb = 0;
2272   PetscScalar    *va,*vb;
2273   Vec            vtmp;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2277   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2278   if (idx) {
2279     for (i=0; i<A->cmap->n; i++) {
2280       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2281     }
2282   }
2283 
2284   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2285   if (idx) {
2286     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2287   }
2288   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2289   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2290 
2291   for (i=0; i<A->rmap->n; i++) {
2292     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2293       va[i] = vb[i];
2294       if (idx) idx[i] = a->garray[idxb[i]];
2295     }
2296   }
2297 
2298   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2299   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2300   ierr = PetscFree(idxb);CHKERRQ(ierr);
2301   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2306 {
2307   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2308   PetscInt       n      = A->rmap->n;
2309   PetscInt       cstart = A->cmap->rstart;
2310   PetscInt       *cmap  = mat->garray;
2311   PetscInt       *diagIdx, *offdiagIdx;
2312   Vec            diagV, offdiagV;
2313   PetscScalar    *a, *diagA, *offdiagA;
2314   PetscInt       r;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2319   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2320   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2321   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2322   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2323   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2324   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2325   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2326   for (r = 0; r < n; ++r) {
2327     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2328       a[r]   = diagA[r];
2329       idx[r] = cstart + diagIdx[r];
2330     } else {
2331       a[r]   = offdiagA[r];
2332       idx[r] = cmap[offdiagIdx[r]];
2333     }
2334   }
2335   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2336   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2337   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2338   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2339   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2340   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2345 {
2346   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2347   PetscInt       n      = A->rmap->n;
2348   PetscInt       cstart = A->cmap->rstart;
2349   PetscInt       *cmap  = mat->garray;
2350   PetscInt       *diagIdx, *offdiagIdx;
2351   Vec            diagV, offdiagV;
2352   PetscScalar    *a, *diagA, *offdiagA;
2353   PetscInt       r;
2354   PetscErrorCode ierr;
2355 
2356   PetscFunctionBegin;
2357   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2358   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2359   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2360   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2361   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2362   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2363   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2364   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2365   for (r = 0; r < n; ++r) {
2366     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2367       a[r]   = diagA[r];
2368       idx[r] = cstart + diagIdx[r];
2369     } else {
2370       a[r]   = offdiagA[r];
2371       idx[r] = cmap[offdiagIdx[r]];
2372     }
2373   }
2374   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2375   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2377   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2378   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2379   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2384 {
2385   PetscErrorCode ierr;
2386   Mat            *dummy;
2387 
2388   PetscFunctionBegin;
2389   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2390   *newmat = *dummy;
2391   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2396 {
2397   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2402   A->factorerrortype = a->A->factorerrortype;
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2407 {
2408   PetscErrorCode ierr;
2409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2413   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2414   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2415   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2420 {
2421   PetscFunctionBegin;
2422   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2423   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 /*@
2428    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2429 
2430    Collective on Mat
2431 
2432    Input Parameters:
2433 +    A - the matrix
2434 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2435 
2436  Level: advanced
2437 
2438 @*/
2439 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2440 {
2441   PetscErrorCode       ierr;
2442 
2443   PetscFunctionBegin;
2444   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2449 {
2450   PetscErrorCode       ierr;
2451   PetscBool            sc = PETSC_FALSE,flg;
2452 
2453   PetscFunctionBegin;
2454   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2455   ierr = PetscObjectOptionsBegin((PetscObject)A);
2456     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2457     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2458     if (flg) {
2459       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2460     }
2461   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2466 {
2467   PetscErrorCode ierr;
2468   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2469   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2470 
2471   PetscFunctionBegin;
2472   if (!Y->preallocated) {
2473     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2474   } else if (!aij->nz) {
2475     PetscInt nonew = aij->nonew;
2476     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2477     aij->nonew = nonew;
2478   }
2479   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2484 {
2485   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2486   PetscErrorCode ierr;
2487 
2488   PetscFunctionBegin;
2489   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2490   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2491   if (d) {
2492     PetscInt rstart;
2493     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2494     *d += rstart;
2495 
2496   }
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 
2501 /* -------------------------------------------------------------------*/
2502 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2503                                        MatGetRow_MPIAIJ,
2504                                        MatRestoreRow_MPIAIJ,
2505                                        MatMult_MPIAIJ,
2506                                 /* 4*/ MatMultAdd_MPIAIJ,
2507                                        MatMultTranspose_MPIAIJ,
2508                                        MatMultTransposeAdd_MPIAIJ,
2509                                        0,
2510                                        0,
2511                                        0,
2512                                 /*10*/ 0,
2513                                        0,
2514                                        0,
2515                                        MatSOR_MPIAIJ,
2516                                        MatTranspose_MPIAIJ,
2517                                 /*15*/ MatGetInfo_MPIAIJ,
2518                                        MatEqual_MPIAIJ,
2519                                        MatGetDiagonal_MPIAIJ,
2520                                        MatDiagonalScale_MPIAIJ,
2521                                        MatNorm_MPIAIJ,
2522                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2523                                        MatAssemblyEnd_MPIAIJ,
2524                                        MatSetOption_MPIAIJ,
2525                                        MatZeroEntries_MPIAIJ,
2526                                 /*24*/ MatZeroRows_MPIAIJ,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                        0,
2531                                 /*29*/ MatSetUp_MPIAIJ,
2532                                        0,
2533                                        0,
2534                                        MatGetDiagonalBlock_MPIAIJ,
2535                                        0,
2536                                 /*34*/ MatDuplicate_MPIAIJ,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                        0,
2541                                 /*39*/ MatAXPY_MPIAIJ,
2542                                        MatCreateSubMatrices_MPIAIJ,
2543                                        MatIncreaseOverlap_MPIAIJ,
2544                                        MatGetValues_MPIAIJ,
2545                                        MatCopy_MPIAIJ,
2546                                 /*44*/ MatGetRowMax_MPIAIJ,
2547                                        MatScale_MPIAIJ,
2548                                        MatShift_MPIAIJ,
2549                                        MatDiagonalSet_MPIAIJ,
2550                                        MatZeroRowsColumns_MPIAIJ,
2551                                 /*49*/ MatSetRandom_MPIAIJ,
2552                                        0,
2553                                        0,
2554                                        0,
2555                                        0,
2556                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2557                                        0,
2558                                        MatSetUnfactored_MPIAIJ,
2559                                        MatPermute_MPIAIJ,
2560                                        0,
2561                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2562                                        MatDestroy_MPIAIJ,
2563                                        MatView_MPIAIJ,
2564                                        0,
2565                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2566                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2567                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2572                                        MatGetRowMinAbs_MPIAIJ,
2573                                        0,
2574                                        0,
2575                                        0,
2576                                        0,
2577                                 /*75*/ MatFDColoringApply_AIJ,
2578                                        MatSetFromOptions_MPIAIJ,
2579                                        0,
2580                                        0,
2581                                        MatFindZeroDiagonals_MPIAIJ,
2582                                 /*80*/ 0,
2583                                        0,
2584                                        0,
2585                                 /*83*/ MatLoad_MPIAIJ,
2586                                        MatIsSymmetric_MPIAIJ,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2592                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2593                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2594                                        MatPtAP_MPIAIJ_MPIAIJ,
2595                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2596                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*99*/ 0,
2602                                        0,
2603                                        0,
2604                                        MatConjugate_MPIAIJ,
2605                                        0,
2606                                 /*104*/MatSetValuesRow_MPIAIJ,
2607                                        MatRealPart_MPIAIJ,
2608                                        MatImaginaryPart_MPIAIJ,
2609                                        0,
2610                                        0,
2611                                 /*109*/0,
2612                                        0,
2613                                        MatGetRowMin_MPIAIJ,
2614                                        0,
2615                                        MatMissingDiagonal_MPIAIJ,
2616                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2617                                        0,
2618                                        MatGetGhosts_MPIAIJ,
2619                                        0,
2620                                        0,
2621                                 /*119*/0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        MatGetMultiProcBlock_MPIAIJ,
2626                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2627                                        MatGetColumnNorms_MPIAIJ,
2628                                        MatInvertBlockDiagonal_MPIAIJ,
2629                                        0,
2630                                        MatCreateSubMatricesMPI_MPIAIJ,
2631                                 /*129*/0,
2632                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2633                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2634                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2635                                        0,
2636                                 /*134*/0,
2637                                        0,
2638                                        MatRARt_MPIAIJ_MPIAIJ,
2639                                        0,
2640                                        0,
2641                                 /*139*/MatSetBlockSizes_MPIAIJ,
2642                                        0,
2643                                        0,
2644                                        MatFDColoringSetUp_MPIXAIJ,
2645                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2646                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2647 };
2648 
2649 /* ----------------------------------------------------------------------------------------*/
2650 
2651 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2652 {
2653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2658   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2663 {
2664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2665   PetscErrorCode ierr;
2666 
2667   PetscFunctionBegin;
2668   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2669   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2674 {
2675   Mat_MPIAIJ     *b;
2676   PetscErrorCode ierr;
2677 
2678   PetscFunctionBegin;
2679   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2680   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2681   b = (Mat_MPIAIJ*)B->data;
2682 
2683 #if defined(PETSC_USE_CTABLE)
2684   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2685 #else
2686   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2687 #endif
2688   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2689   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2690   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2691 
2692   /* Because the B will have been resized we simply destroy it and create a new one each time */
2693   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2694   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2695   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2696   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2697   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2698   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2699 
2700   if (!B->preallocated) {
2701     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2702     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2703     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2704     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2705     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2706   }
2707 
2708   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2709   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2710   B->preallocated  = PETSC_TRUE;
2711   B->was_assembled = PETSC_FALSE;
2712   B->assembled     = PETSC_FALSE;;
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2717 {
2718   Mat_MPIAIJ     *b;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2723   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2724   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2725   b = (Mat_MPIAIJ*)B->data;
2726 
2727 #if defined(PETSC_USE_CTABLE)
2728   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2729 #else
2730   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2731 #endif
2732   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2733   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2734   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2735 
2736   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2737   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2738   B->preallocated  = PETSC_TRUE;
2739   B->was_assembled = PETSC_FALSE;
2740   B->assembled = PETSC_FALSE;
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2745 {
2746   Mat            mat;
2747   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   *newmat = 0;
2752   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2753   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2754   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2755   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2756   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2757   a       = (Mat_MPIAIJ*)mat->data;
2758 
2759   mat->factortype   = matin->factortype;
2760   mat->assembled    = PETSC_TRUE;
2761   mat->insertmode   = NOT_SET_VALUES;
2762   mat->preallocated = PETSC_TRUE;
2763 
2764   a->size         = oldmat->size;
2765   a->rank         = oldmat->rank;
2766   a->donotstash   = oldmat->donotstash;
2767   a->roworiented  = oldmat->roworiented;
2768   a->rowindices   = 0;
2769   a->rowvalues    = 0;
2770   a->getrowactive = PETSC_FALSE;
2771 
2772   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2773   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2774 
2775   if (oldmat->colmap) {
2776 #if defined(PETSC_USE_CTABLE)
2777     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2778 #else
2779     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2780     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2781     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2782 #endif
2783   } else a->colmap = 0;
2784   if (oldmat->garray) {
2785     PetscInt len;
2786     len  = oldmat->B->cmap->n;
2787     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2788     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2789     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2790   } else a->garray = 0;
2791 
2792   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2793   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2794   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2795   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2796 
2797   if (oldmat->Mvctx_mpi1) {
2798     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2799     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2800   }
2801 
2802   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2803   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2804   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2805   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2806   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2807   *newmat = mat;
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2812 {
2813   PetscScalar    *vals,*svals;
2814   MPI_Comm       comm;
2815   PetscErrorCode ierr;
2816   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2817   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2818   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2819   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2820   PetscInt       cend,cstart,n,*rowners;
2821   int            fd;
2822   PetscInt       bs = newMat->rmap->bs;
2823 
2824   PetscFunctionBegin;
2825   /* force binary viewer to load .info file if it has not yet done so */
2826   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2827   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2828   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2829   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2830   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2831   if (!rank) {
2832     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2833     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2834     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2835   }
2836 
2837   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2838   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2839   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2840   if (bs < 0) bs = 1;
2841 
2842   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2843   M    = header[1]; N = header[2];
2844 
2845   /* If global sizes are set, check if they are consistent with that given in the file */
2846   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2847   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2848 
2849   /* determine ownership of all (block) rows */
2850   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2851   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2852   else m = newMat->rmap->n; /* Set by user */
2853 
2854   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2855   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2856 
2857   /* First process needs enough room for process with most rows */
2858   if (!rank) {
2859     mmax = rowners[1];
2860     for (i=2; i<=size; i++) {
2861       mmax = PetscMax(mmax, rowners[i]);
2862     }
2863   } else mmax = -1;             /* unused, but compilers complain */
2864 
2865   rowners[0] = 0;
2866   for (i=2; i<=size; i++) {
2867     rowners[i] += rowners[i-1];
2868   }
2869   rstart = rowners[rank];
2870   rend   = rowners[rank+1];
2871 
2872   /* distribute row lengths to all processors */
2873   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2874   if (!rank) {
2875     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2876     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2877     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2878     for (j=0; j<m; j++) {
2879       procsnz[0] += ourlens[j];
2880     }
2881     for (i=1; i<size; i++) {
2882       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2883       /* calculate the number of nonzeros on each processor */
2884       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2885         procsnz[i] += rowlengths[j];
2886       }
2887       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2888     }
2889     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2890   } else {
2891     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   if (!rank) {
2895     /* determine max buffer needed and allocate it */
2896     maxnz = 0;
2897     for (i=0; i<size; i++) {
2898       maxnz = PetscMax(maxnz,procsnz[i]);
2899     }
2900     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2901 
2902     /* read in my part of the matrix column indices  */
2903     nz   = procsnz[0];
2904     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2905     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2906 
2907     /* read in every one elses and ship off */
2908     for (i=1; i<size; i++) {
2909       nz   = procsnz[i];
2910       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2911       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2912     }
2913     ierr = PetscFree(cols);CHKERRQ(ierr);
2914   } else {
2915     /* determine buffer space needed for message */
2916     nz = 0;
2917     for (i=0; i<m; i++) {
2918       nz += ourlens[i];
2919     }
2920     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2921 
2922     /* receive message of column indices*/
2923     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2924   }
2925 
2926   /* determine column ownership if matrix is not square */
2927   if (N != M) {
2928     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2929     else n = newMat->cmap->n;
2930     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2931     cstart = cend - n;
2932   } else {
2933     cstart = rstart;
2934     cend   = rend;
2935     n      = cend - cstart;
2936   }
2937 
2938   /* loop over local rows, determining number of off diagonal entries */
2939   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2940   jj   = 0;
2941   for (i=0; i<m; i++) {
2942     for (j=0; j<ourlens[i]; j++) {
2943       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2944       jj++;
2945     }
2946   }
2947 
2948   for (i=0; i<m; i++) {
2949     ourlens[i] -= offlens[i];
2950   }
2951   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2952 
2953   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2954 
2955   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2956 
2957   for (i=0; i<m; i++) {
2958     ourlens[i] += offlens[i];
2959   }
2960 
2961   if (!rank) {
2962     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2963 
2964     /* read in my part of the matrix numerical values  */
2965     nz   = procsnz[0];
2966     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2967 
2968     /* insert into matrix */
2969     jj      = rstart;
2970     smycols = mycols;
2971     svals   = vals;
2972     for (i=0; i<m; i++) {
2973       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2974       smycols += ourlens[i];
2975       svals   += ourlens[i];
2976       jj++;
2977     }
2978 
2979     /* read in other processors and ship out */
2980     for (i=1; i<size; i++) {
2981       nz   = procsnz[i];
2982       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2983       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2984     }
2985     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2986   } else {
2987     /* receive numeric values */
2988     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2989 
2990     /* receive message of values*/
2991     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2992 
2993     /* insert into matrix */
2994     jj      = rstart;
2995     smycols = mycols;
2996     svals   = vals;
2997     for (i=0; i<m; i++) {
2998       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2999       smycols += ourlens[i];
3000       svals   += ourlens[i];
3001       jj++;
3002     }
3003   }
3004   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3005   ierr = PetscFree(vals);CHKERRQ(ierr);
3006   ierr = PetscFree(mycols);CHKERRQ(ierr);
3007   ierr = PetscFree(rowners);CHKERRQ(ierr);
3008   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3009   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 /* Not scalable because of ISAllGather() unless getting all columns. */
3014 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3015 {
3016   PetscErrorCode ierr;
3017   IS             iscol_local;
3018   PetscBool      isstride;
3019   PetscMPIInt    lisstride=0,gisstride;
3020 
3021   PetscFunctionBegin;
3022   /* check if we are grabbing all columns*/
3023   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3024 
3025   if (isstride) {
3026     PetscInt  start,len,mstart,mlen;
3027     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3028     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3029     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3030     if (mstart == start && mlen-mstart == len) lisstride = 1;
3031   }
3032 
3033   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3034   if (gisstride) {
3035     PetscInt N;
3036     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3037     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3038     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3039     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3040   } else {
3041     PetscInt cbs;
3042     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3043     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3044     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3045   }
3046 
3047   *isseq = iscol_local;
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 /*
3052  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3053  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3054 
3055  Input Parameters:
3056    mat - matrix
3057    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3058            i.e., mat->rstart <= isrow[i] < mat->rend
3059    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3060            i.e., mat->cstart <= iscol[i] < mat->cend
3061  Output Parameter:
3062    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3063    iscol_o - sequential column index set for retrieving mat->B
3064    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3065  */
3066 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3067 {
3068   PetscErrorCode ierr;
3069   Vec            x,cmap;
3070   const PetscInt *is_idx;
3071   PetscScalar    *xarray,*cmaparray;
3072   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3073   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3074   Mat            B=a->B;
3075   Vec            lvec=a->lvec,lcmap;
3076   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3077   MPI_Comm       comm;
3078   VecScatter     Mvctx=a->Mvctx;
3079 
3080   PetscFunctionBegin;
3081   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3082   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3083 
3084   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3085   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3086   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3087   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3088   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3089 
3090   /* Get start indices */
3091   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3092   isstart -= ncols;
3093   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3094 
3095   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3096   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3097   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3098   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3099   for (i=0; i<ncols; i++) {
3100     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3101     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3102     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3103   }
3104   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3105   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3106   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3107 
3108   /* Get iscol_d */
3109   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3110   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3111   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3112 
3113   /* Get isrow_d */
3114   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3115   rstart = mat->rmap->rstart;
3116   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3117   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3118   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3119   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3120 
3121   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3122   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3123   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3124 
3125   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3126   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3127   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3128 
3129   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3130 
3131   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3132   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3133 
3134   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3135   /* off-process column indices */
3136   count = 0;
3137   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3138   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3139 
3140   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3141   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3142   for (i=0; i<Bn; i++) {
3143     if (PetscRealPart(xarray[i]) > -1.0) {
3144       idx[count]     = i;                   /* local column index in off-diagonal part B */
3145       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3146       count++;
3147     }
3148   }
3149   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3150   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3151 
3152   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3153   /* cannot ensure iscol_o has same blocksize as iscol! */
3154 
3155   ierr = PetscFree(idx);CHKERRQ(ierr);
3156   *garray = cmap1;
3157 
3158   ierr = VecDestroy(&x);CHKERRQ(ierr);
3159   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3160   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3165 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3166 {
3167   PetscErrorCode ierr;
3168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3169   Mat            M = NULL;
3170   MPI_Comm       comm;
3171   IS             iscol_d,isrow_d,iscol_o;
3172   Mat            Asub = NULL,Bsub = NULL;
3173   PetscInt       n;
3174 
3175   PetscFunctionBegin;
3176   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3177 
3178   if (call == MAT_REUSE_MATRIX) {
3179     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3180     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3181     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3182 
3183     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3184     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3185 
3186     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3187     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3188 
3189     /* Update diagonal and off-diagonal portions of submat */
3190     asub = (Mat_MPIAIJ*)(*submat)->data;
3191     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3192     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3193     if (n) {
3194       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3195     }
3196     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3197     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3198 
3199   } else { /* call == MAT_INITIAL_MATRIX) */
3200     const PetscInt *garray;
3201     PetscInt        BsubN;
3202 
3203     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3204     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3205 
3206     /* Create local submatrices Asub and Bsub */
3207     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3208     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3209 
3210     /* Create submatrix M */
3211     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3212 
3213     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3214     asub = (Mat_MPIAIJ*)M->data;
3215 
3216     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3217     n = asub->B->cmap->N;
3218     if (BsubN > n) {
3219       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3220       const PetscInt *idx;
3221       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3222       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3223 
3224       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3225       j = 0;
3226       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3227       for (i=0; i<n; i++) {
3228         if (j >= BsubN) break;
3229         while (subgarray[i] > garray[j]) j++;
3230 
3231         if (subgarray[i] == garray[j]) {
3232           idx_new[i] = idx[j++];
3233         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3234       }
3235       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3236 
3237       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3238       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3239 
3240     } else if (BsubN < n) {
3241       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3242     }
3243 
3244     ierr = PetscFree(garray);CHKERRQ(ierr);
3245     *submat = M;
3246 
3247     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3248     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3249     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3250 
3251     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3252     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3253 
3254     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3255     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3256   }
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3261 {
3262   PetscErrorCode ierr;
3263   IS             iscol_local=NULL,isrow_d;
3264   PetscInt       csize;
3265   PetscInt       n,i,j,start,end;
3266   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3267   MPI_Comm       comm;
3268 
3269   PetscFunctionBegin;
3270   /* If isrow has same processor distribution as mat,
3271      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3272   if (call == MAT_REUSE_MATRIX) {
3273     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3274     if (isrow_d) {
3275       sameRowDist  = PETSC_TRUE;
3276       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3277     } else {
3278       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3279       if (iscol_local) {
3280         sameRowDist  = PETSC_TRUE;
3281         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3282       }
3283     }
3284   } else {
3285     /* Check if isrow has same processor distribution as mat */
3286     sameDist[0] = PETSC_FALSE;
3287     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3288     if (!n) {
3289       sameDist[0] = PETSC_TRUE;
3290     } else {
3291       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3292       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3293       if (i >= start && j < end) {
3294         sameDist[0] = PETSC_TRUE;
3295       }
3296     }
3297 
3298     /* Check if iscol has same processor distribution as mat */
3299     sameDist[1] = PETSC_FALSE;
3300     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3301     if (!n) {
3302       sameDist[1] = PETSC_TRUE;
3303     } else {
3304       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3305       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3306       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3307     }
3308 
3309     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3310     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3311     sameRowDist = tsameDist[0];
3312   }
3313 
3314   if (sameRowDist) {
3315     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3316       /* isrow and iscol have same processor distribution as mat */
3317       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3318       PetscFunctionReturn(0);
3319     } else { /* sameRowDist */
3320       /* isrow has same processor distribution as mat */
3321       if (call == MAT_INITIAL_MATRIX) {
3322         PetscBool sorted;
3323         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3324         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3325         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3326         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3327 
3328         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3329         if (sorted) {
3330           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3331           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3332           PetscFunctionReturn(0);
3333         }
3334       } else { /* call == MAT_REUSE_MATRIX */
3335         IS    iscol_sub;
3336         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3337         if (iscol_sub) {
3338           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3339           PetscFunctionReturn(0);
3340         }
3341       }
3342     }
3343   }
3344 
3345   /* General case: iscol -> iscol_local which has global size of iscol */
3346   if (call == MAT_REUSE_MATRIX) {
3347     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3348     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3349   } else {
3350     if (!iscol_local) {
3351       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3352     }
3353   }
3354 
3355   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3356   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3357 
3358   if (call == MAT_INITIAL_MATRIX) {
3359     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3360     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3361   }
3362   PetscFunctionReturn(0);
3363 }
3364 
3365 /*@C
3366      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3367          and "off-diagonal" part of the matrix in CSR format.
3368 
3369    Collective on MPI_Comm
3370 
3371    Input Parameters:
3372 +  comm - MPI communicator
3373 .  A - "diagonal" portion of matrix
3374 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3375 -  garray - global index of B columns
3376 
3377    Output Parameter:
3378 .   mat - the matrix, with input A as its local diagonal matrix
3379    Level: advanced
3380 
3381    Notes:
3382        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3383        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3384 
3385 .seealso: MatCreateMPIAIJWithSplitArrays()
3386 @*/
3387 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3388 {
3389   PetscErrorCode ierr;
3390   Mat_MPIAIJ     *maij;
3391   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3392   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3393   PetscScalar    *oa=b->a;
3394   Mat            Bnew;
3395   PetscInt       m,n,N;
3396 
3397   PetscFunctionBegin;
3398   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3399   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3400   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3401   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3402   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3403   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3404 
3405   /* Get global columns of mat */
3406   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3407 
3408   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3409   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3410   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3411   maij = (Mat_MPIAIJ*)(*mat)->data;
3412 
3413   (*mat)->preallocated = PETSC_TRUE;
3414 
3415   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3416   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3417 
3418   /* Set A as diagonal portion of *mat */
3419   maij->A = A;
3420 
3421   nz = oi[m];
3422   for (i=0; i<nz; i++) {
3423     col   = oj[i];
3424     oj[i] = garray[col];
3425   }
3426 
3427    /* Set Bnew as off-diagonal portion of *mat */
3428   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3429   bnew        = (Mat_SeqAIJ*)Bnew->data;
3430   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3431   maij->B     = Bnew;
3432 
3433   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3434 
3435   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3436   b->free_a       = PETSC_FALSE;
3437   b->free_ij      = PETSC_FALSE;
3438   ierr = MatDestroy(&B);CHKERRQ(ierr);
3439 
3440   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3441   bnew->free_a       = PETSC_TRUE;
3442   bnew->free_ij      = PETSC_TRUE;
3443 
3444   /* condense columns of maij->B */
3445   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3446   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3447   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3448   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3449   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3450   PetscFunctionReturn(0);
3451 }
3452 
3453 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3454 
3455 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3456 {
3457   PetscErrorCode ierr;
3458   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3459   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3460   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3461   Mat            M,Msub,B=a->B;
3462   MatScalar      *aa;
3463   Mat_SeqAIJ     *aij;
3464   PetscInt       *garray = a->garray,*colsub,Ncols;
3465   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3466   IS             iscol_sub,iscmap;
3467   const PetscInt *is_idx,*cmap;
3468   PetscBool      allcolumns=PETSC_FALSE;
3469   MPI_Comm       comm;
3470 
3471   PetscFunctionBegin;
3472   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3473 
3474   if (call == MAT_REUSE_MATRIX) {
3475     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3476     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3477     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3478 
3479     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3480     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3481 
3482     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3483     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3484 
3485     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3486 
3487   } else { /* call == MAT_INITIAL_MATRIX) */
3488     PetscBool flg;
3489 
3490     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3491     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3492 
3493     /* (1) iscol -> nonscalable iscol_local */
3494     /* Check for special case: each processor gets entire matrix columns */
3495     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3496     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3497     if (allcolumns) {
3498       iscol_sub = iscol_local;
3499       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3500       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3501 
3502     } else {
3503       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3504       PetscInt *idx,*cmap1,k;
3505       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3506       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3507       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3508       count = 0;
3509       k     = 0;
3510       for (i=0; i<Ncols; i++) {
3511         j = is_idx[i];
3512         if (j >= cstart && j < cend) {
3513           /* diagonal part of mat */
3514           idx[count]     = j;
3515           cmap1[count++] = i; /* column index in submat */
3516         } else if (Bn) {
3517           /* off-diagonal part of mat */
3518           if (j == garray[k]) {
3519             idx[count]     = j;
3520             cmap1[count++] = i;  /* column index in submat */
3521           } else if (j > garray[k]) {
3522             while (j > garray[k] && k < Bn-1) k++;
3523             if (j == garray[k]) {
3524               idx[count]     = j;
3525               cmap1[count++] = i; /* column index in submat */
3526             }
3527           }
3528         }
3529       }
3530       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3531 
3532       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3533       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3534       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3535 
3536       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3537     }
3538 
3539     /* (3) Create sequential Msub */
3540     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3541   }
3542 
3543   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3544   aij  = (Mat_SeqAIJ*)(Msub)->data;
3545   ii   = aij->i;
3546   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3547 
3548   /*
3549       m - number of local rows
3550       Ncols - number of columns (same on all processors)
3551       rstart - first row in new global matrix generated
3552   */
3553   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3554 
3555   if (call == MAT_INITIAL_MATRIX) {
3556     /* (4) Create parallel newmat */
3557     PetscMPIInt    rank,size;
3558     PetscInt       csize;
3559 
3560     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3561     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3562 
3563     /*
3564         Determine the number of non-zeros in the diagonal and off-diagonal
3565         portions of the matrix in order to do correct preallocation
3566     */
3567 
3568     /* first get start and end of "diagonal" columns */
3569     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3570     if (csize == PETSC_DECIDE) {
3571       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3572       if (mglobal == Ncols) { /* square matrix */
3573         nlocal = m;
3574       } else {
3575         nlocal = Ncols/size + ((Ncols % size) > rank);
3576       }
3577     } else {
3578       nlocal = csize;
3579     }
3580     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3581     rstart = rend - nlocal;
3582     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3583 
3584     /* next, compute all the lengths */
3585     jj    = aij->j;
3586     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3587     olens = dlens + m;
3588     for (i=0; i<m; i++) {
3589       jend = ii[i+1] - ii[i];
3590       olen = 0;
3591       dlen = 0;
3592       for (j=0; j<jend; j++) {
3593         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3594         else dlen++;
3595         jj++;
3596       }
3597       olens[i] = olen;
3598       dlens[i] = dlen;
3599     }
3600 
3601     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3602     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3603 
3604     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3605     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3606     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3607     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3608     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3609     ierr = PetscFree(dlens);CHKERRQ(ierr);
3610 
3611   } else { /* call == MAT_REUSE_MATRIX */
3612     M    = *newmat;
3613     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3614     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3615     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3616     /*
3617          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3618        rather than the slower MatSetValues().
3619     */
3620     M->was_assembled = PETSC_TRUE;
3621     M->assembled     = PETSC_FALSE;
3622   }
3623 
3624   /* (5) Set values of Msub to *newmat */
3625   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3626   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3627 
3628   jj   = aij->j;
3629   aa   = aij->a;
3630   for (i=0; i<m; i++) {
3631     row = rstart + i;
3632     nz  = ii[i+1] - ii[i];
3633     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3634     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3635     jj += nz; aa += nz;
3636   }
3637   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3638 
3639   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3640   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641 
3642   ierr = PetscFree(colsub);CHKERRQ(ierr);
3643 
3644   /* save Msub, iscol_sub and iscmap used in processor for next request */
3645   if (call ==  MAT_INITIAL_MATRIX) {
3646     *newmat = M;
3647     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3648     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3649 
3650     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3651     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3652 
3653     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3654     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3655 
3656     if (iscol_local) {
3657       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3658       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3659     }
3660   }
3661   PetscFunctionReturn(0);
3662 }
3663 
3664 /*
3665     Not great since it makes two copies of the submatrix, first an SeqAIJ
3666   in local and then by concatenating the local matrices the end result.
3667   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3668 
3669   Note: This requires a sequential iscol with all indices.
3670 */
3671 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3672 {
3673   PetscErrorCode ierr;
3674   PetscMPIInt    rank,size;
3675   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3676   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3677   Mat            M,Mreuse;
3678   MatScalar      *aa,*vwork;
3679   MPI_Comm       comm;
3680   Mat_SeqAIJ     *aij;
3681   PetscBool      colflag,allcolumns=PETSC_FALSE;
3682 
3683   PetscFunctionBegin;
3684   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3685   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3686   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3687 
3688   /* Check for special case: each processor gets entire matrix columns */
3689   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3690   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3691   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3692 
3693   if (call ==  MAT_REUSE_MATRIX) {
3694     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3695     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3696     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3697   } else {
3698     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3699   }
3700 
3701   /*
3702       m - number of local rows
3703       n - number of columns (same on all processors)
3704       rstart - first row in new global matrix generated
3705   */
3706   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3707   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3708   if (call == MAT_INITIAL_MATRIX) {
3709     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3710     ii  = aij->i;
3711     jj  = aij->j;
3712 
3713     /*
3714         Determine the number of non-zeros in the diagonal and off-diagonal
3715         portions of the matrix in order to do correct preallocation
3716     */
3717 
3718     /* first get start and end of "diagonal" columns */
3719     if (csize == PETSC_DECIDE) {
3720       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3721       if (mglobal == n) { /* square matrix */
3722         nlocal = m;
3723       } else {
3724         nlocal = n/size + ((n % size) > rank);
3725       }
3726     } else {
3727       nlocal = csize;
3728     }
3729     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3730     rstart = rend - nlocal;
3731     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3732 
3733     /* next, compute all the lengths */
3734     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3735     olens = dlens + m;
3736     for (i=0; i<m; i++) {
3737       jend = ii[i+1] - ii[i];
3738       olen = 0;
3739       dlen = 0;
3740       for (j=0; j<jend; j++) {
3741         if (*jj < rstart || *jj >= rend) olen++;
3742         else dlen++;
3743         jj++;
3744       }
3745       olens[i] = olen;
3746       dlens[i] = dlen;
3747     }
3748     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3749     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3750     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3751     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3752     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3753     ierr = PetscFree(dlens);CHKERRQ(ierr);
3754   } else {
3755     PetscInt ml,nl;
3756 
3757     M    = *newmat;
3758     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3759     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3760     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3761     /*
3762          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3763        rather than the slower MatSetValues().
3764     */
3765     M->was_assembled = PETSC_TRUE;
3766     M->assembled     = PETSC_FALSE;
3767   }
3768   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3769   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3770   ii   = aij->i;
3771   jj   = aij->j;
3772   aa   = aij->a;
3773   for (i=0; i<m; i++) {
3774     row   = rstart + i;
3775     nz    = ii[i+1] - ii[i];
3776     cwork = jj;     jj += nz;
3777     vwork = aa;     aa += nz;
3778     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3779   }
3780 
3781   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3782   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3783   *newmat = M;
3784 
3785   /* save submatrix used in processor for next request */
3786   if (call ==  MAT_INITIAL_MATRIX) {
3787     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3788     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3789   }
3790   PetscFunctionReturn(0);
3791 }
3792 
3793 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3794 {
3795   PetscInt       m,cstart, cend,j,nnz,i,d;
3796   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3797   const PetscInt *JJ;
3798   PetscScalar    *values;
3799   PetscErrorCode ierr;
3800   PetscBool      nooffprocentries;
3801 
3802   PetscFunctionBegin;
3803   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3804 
3805   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3806   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3807   m      = B->rmap->n;
3808   cstart = B->cmap->rstart;
3809   cend   = B->cmap->rend;
3810   rstart = B->rmap->rstart;
3811 
3812   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3813 
3814 #if defined(PETSC_USE_DEBUG)
3815   for (i=0; i<m; i++) {
3816     nnz = Ii[i+1]- Ii[i];
3817     JJ  = J + Ii[i];
3818     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3819     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3820     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3821   }
3822 #endif
3823 
3824   for (i=0; i<m; i++) {
3825     nnz     = Ii[i+1]- Ii[i];
3826     JJ      = J + Ii[i];
3827     nnz_max = PetscMax(nnz_max,nnz);
3828     d       = 0;
3829     for (j=0; j<nnz; j++) {
3830       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3831     }
3832     d_nnz[i] = d;
3833     o_nnz[i] = nnz - d;
3834   }
3835   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3836   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3837 
3838   if (v) values = (PetscScalar*)v;
3839   else {
3840     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3841   }
3842 
3843   for (i=0; i<m; i++) {
3844     ii   = i + rstart;
3845     nnz  = Ii[i+1]- Ii[i];
3846     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3847   }
3848   nooffprocentries    = B->nooffprocentries;
3849   B->nooffprocentries = PETSC_TRUE;
3850   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3851   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3852   B->nooffprocentries = nooffprocentries;
3853 
3854   if (!v) {
3855     ierr = PetscFree(values);CHKERRQ(ierr);
3856   }
3857   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3858   PetscFunctionReturn(0);
3859 }
3860 
3861 /*@
3862    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3863    (the default parallel PETSc format).
3864 
3865    Collective on MPI_Comm
3866 
3867    Input Parameters:
3868 +  B - the matrix
3869 .  i - the indices into j for the start of each local row (starts with zero)
3870 .  j - the column indices for each local row (starts with zero)
3871 -  v - optional values in the matrix
3872 
3873    Level: developer
3874 
3875    Notes:
3876        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3877      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3878      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3879 
3880        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3881 
3882        The format which is used for the sparse matrix input, is equivalent to a
3883     row-major ordering.. i.e for the following matrix, the input data expected is
3884     as shown
3885 
3886 $        1 0 0
3887 $        2 0 3     P0
3888 $       -------
3889 $        4 5 6     P1
3890 $
3891 $     Process0 [P0]: rows_owned=[0,1]
3892 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3893 $        j =  {0,0,2}  [size = 3]
3894 $        v =  {1,2,3}  [size = 3]
3895 $
3896 $     Process1 [P1]: rows_owned=[2]
3897 $        i =  {0,3}    [size = nrow+1  = 1+1]
3898 $        j =  {0,1,2}  [size = 3]
3899 $        v =  {4,5,6}  [size = 3]
3900 
3901 .keywords: matrix, aij, compressed row, sparse, parallel
3902 
3903 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3904           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3905 @*/
3906 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3907 {
3908   PetscErrorCode ierr;
3909 
3910   PetscFunctionBegin;
3911   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3912   PetscFunctionReturn(0);
3913 }
3914 
3915 /*@C
3916    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3917    (the default parallel PETSc format).  For good matrix assembly performance
3918    the user should preallocate the matrix storage by setting the parameters
3919    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3920    performance can be increased by more than a factor of 50.
3921 
3922    Collective on MPI_Comm
3923 
3924    Input Parameters:
3925 +  B - the matrix
3926 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3927            (same value is used for all local rows)
3928 .  d_nnz - array containing the number of nonzeros in the various rows of the
3929            DIAGONAL portion of the local submatrix (possibly different for each row)
3930            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3931            The size of this array is equal to the number of local rows, i.e 'm'.
3932            For matrices that will be factored, you must leave room for (and set)
3933            the diagonal entry even if it is zero.
3934 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3935            submatrix (same value is used for all local rows).
3936 -  o_nnz - array containing the number of nonzeros in the various rows of the
3937            OFF-DIAGONAL portion of the local submatrix (possibly different for
3938            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3939            structure. The size of this array is equal to the number
3940            of local rows, i.e 'm'.
3941 
3942    If the *_nnz parameter is given then the *_nz parameter is ignored
3943 
3944    The AIJ format (also called the Yale sparse matrix format or
3945    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3946    storage.  The stored row and column indices begin with zero.
3947    See Users-Manual: ch_mat for details.
3948 
3949    The parallel matrix is partitioned such that the first m0 rows belong to
3950    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3951    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3952 
3953    The DIAGONAL portion of the local submatrix of a processor can be defined
3954    as the submatrix which is obtained by extraction the part corresponding to
3955    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3956    first row that belongs to the processor, r2 is the last row belonging to
3957    the this processor, and c1-c2 is range of indices of the local part of a
3958    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3959    common case of a square matrix, the row and column ranges are the same and
3960    the DIAGONAL part is also square. The remaining portion of the local
3961    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3962 
3963    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3964 
3965    You can call MatGetInfo() to get information on how effective the preallocation was;
3966    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3967    You can also run with the option -info and look for messages with the string
3968    malloc in them to see if additional memory allocation was needed.
3969 
3970    Example usage:
3971 
3972    Consider the following 8x8 matrix with 34 non-zero values, that is
3973    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3974    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3975    as follows:
3976 
3977 .vb
3978             1  2  0  |  0  3  0  |  0  4
3979     Proc0   0  5  6  |  7  0  0  |  8  0
3980             9  0 10  | 11  0  0  | 12  0
3981     -------------------------------------
3982            13  0 14  | 15 16 17  |  0  0
3983     Proc1   0 18  0  | 19 20 21  |  0  0
3984             0  0  0  | 22 23  0  | 24  0
3985     -------------------------------------
3986     Proc2  25 26 27  |  0  0 28  | 29  0
3987            30  0  0  | 31 32 33  |  0 34
3988 .ve
3989 
3990    This can be represented as a collection of submatrices as:
3991 
3992 .vb
3993       A B C
3994       D E F
3995       G H I
3996 .ve
3997 
3998    Where the submatrices A,B,C are owned by proc0, D,E,F are
3999    owned by proc1, G,H,I are owned by proc2.
4000 
4001    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4002    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4003    The 'M','N' parameters are 8,8, and have the same values on all procs.
4004 
4005    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4006    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4007    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4008    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4009    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4010    matrix, ans [DF] as another SeqAIJ matrix.
4011 
4012    When d_nz, o_nz parameters are specified, d_nz storage elements are
4013    allocated for every row of the local diagonal submatrix, and o_nz
4014    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4015    One way to choose d_nz and o_nz is to use the max nonzerors per local
4016    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4017    In this case, the values of d_nz,o_nz are:
4018 .vb
4019      proc0 : dnz = 2, o_nz = 2
4020      proc1 : dnz = 3, o_nz = 2
4021      proc2 : dnz = 1, o_nz = 4
4022 .ve
4023    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4024    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4025    for proc3. i.e we are using 12+15+10=37 storage locations to store
4026    34 values.
4027 
4028    When d_nnz, o_nnz parameters are specified, the storage is specified
4029    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4030    In the above case the values for d_nnz,o_nnz are:
4031 .vb
4032      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4033      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4034      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4035 .ve
4036    Here the space allocated is sum of all the above values i.e 34, and
4037    hence pre-allocation is perfect.
4038 
4039    Level: intermediate
4040 
4041 .keywords: matrix, aij, compressed row, sparse, parallel
4042 
4043 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4044           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4045 @*/
4046 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4047 {
4048   PetscErrorCode ierr;
4049 
4050   PetscFunctionBegin;
4051   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4052   PetscValidType(B,1);
4053   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4054   PetscFunctionReturn(0);
4055 }
4056 
4057 /*@
4058      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4059          CSR format the local rows.
4060 
4061    Collective on MPI_Comm
4062 
4063    Input Parameters:
4064 +  comm - MPI communicator
4065 .  m - number of local rows (Cannot be PETSC_DECIDE)
4066 .  n - This value should be the same as the local size used in creating the
4067        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4068        calculated if N is given) For square matrices n is almost always m.
4069 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4070 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4071 .   i - row indices
4072 .   j - column indices
4073 -   a - matrix values
4074 
4075    Output Parameter:
4076 .   mat - the matrix
4077 
4078    Level: intermediate
4079 
4080    Notes:
4081        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4082      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4083      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4084 
4085        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4086 
4087        The format which is used for the sparse matrix input, is equivalent to a
4088     row-major ordering.. i.e for the following matrix, the input data expected is
4089     as shown
4090 
4091 $        1 0 0
4092 $        2 0 3     P0
4093 $       -------
4094 $        4 5 6     P1
4095 $
4096 $     Process0 [P0]: rows_owned=[0,1]
4097 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4098 $        j =  {0,0,2}  [size = 3]
4099 $        v =  {1,2,3}  [size = 3]
4100 $
4101 $     Process1 [P1]: rows_owned=[2]
4102 $        i =  {0,3}    [size = nrow+1  = 1+1]
4103 $        j =  {0,1,2}  [size = 3]
4104 $        v =  {4,5,6}  [size = 3]
4105 
4106 .keywords: matrix, aij, compressed row, sparse, parallel
4107 
4108 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4109           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4110 @*/
4111 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4112 {
4113   PetscErrorCode ierr;
4114 
4115   PetscFunctionBegin;
4116   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4117   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4118   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4119   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4120   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4121   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4122   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4123   PetscFunctionReturn(0);
4124 }
4125 
4126 /*@C
4127    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4128    (the default parallel PETSc format).  For good matrix assembly performance
4129    the user should preallocate the matrix storage by setting the parameters
4130    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4131    performance can be increased by more than a factor of 50.
4132 
4133    Collective on MPI_Comm
4134 
4135    Input Parameters:
4136 +  comm - MPI communicator
4137 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4138            This value should be the same as the local size used in creating the
4139            y vector for the matrix-vector product y = Ax.
4140 .  n - This value should be the same as the local size used in creating the
4141        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4142        calculated if N is given) For square matrices n is almost always m.
4143 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4144 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4145 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4146            (same value is used for all local rows)
4147 .  d_nnz - array containing the number of nonzeros in the various rows of the
4148            DIAGONAL portion of the local submatrix (possibly different for each row)
4149            or NULL, if d_nz is used to specify the nonzero structure.
4150            The size of this array is equal to the number of local rows, i.e 'm'.
4151 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4152            submatrix (same value is used for all local rows).
4153 -  o_nnz - array containing the number of nonzeros in the various rows of the
4154            OFF-DIAGONAL portion of the local submatrix (possibly different for
4155            each row) or NULL, if o_nz is used to specify the nonzero
4156            structure. The size of this array is equal to the number
4157            of local rows, i.e 'm'.
4158 
4159    Output Parameter:
4160 .  A - the matrix
4161 
4162    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4163    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4164    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4165 
4166    Notes:
4167    If the *_nnz parameter is given then the *_nz parameter is ignored
4168 
4169    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4170    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4171    storage requirements for this matrix.
4172 
4173    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4174    processor than it must be used on all processors that share the object for
4175    that argument.
4176 
4177    The user MUST specify either the local or global matrix dimensions
4178    (possibly both).
4179 
4180    The parallel matrix is partitioned across processors such that the
4181    first m0 rows belong to process 0, the next m1 rows belong to
4182    process 1, the next m2 rows belong to process 2 etc.. where
4183    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4184    values corresponding to [m x N] submatrix.
4185 
4186    The columns are logically partitioned with the n0 columns belonging
4187    to 0th partition, the next n1 columns belonging to the next
4188    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4189 
4190    The DIAGONAL portion of the local submatrix on any given processor
4191    is the submatrix corresponding to the rows and columns m,n
4192    corresponding to the given processor. i.e diagonal matrix on
4193    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4194    etc. The remaining portion of the local submatrix [m x (N-n)]
4195    constitute the OFF-DIAGONAL portion. The example below better
4196    illustrates this concept.
4197 
4198    For a square global matrix we define each processor's diagonal portion
4199    to be its local rows and the corresponding columns (a square submatrix);
4200    each processor's off-diagonal portion encompasses the remainder of the
4201    local matrix (a rectangular submatrix).
4202 
4203    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4204 
4205    When calling this routine with a single process communicator, a matrix of
4206    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4207    type of communicator, use the construction mechanism
4208 .vb
4209      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4210 .ve
4211 
4212 $     MatCreate(...,&A);
4213 $     MatSetType(A,MATMPIAIJ);
4214 $     MatSetSizes(A, m,n,M,N);
4215 $     MatMPIAIJSetPreallocation(A,...);
4216 
4217    By default, this format uses inodes (identical nodes) when possible.
4218    We search for consecutive rows with the same nonzero structure, thereby
4219    reusing matrix information to achieve increased efficiency.
4220 
4221    Options Database Keys:
4222 +  -mat_no_inode  - Do not use inodes
4223 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4224 
4225 
4226 
4227    Example usage:
4228 
4229    Consider the following 8x8 matrix with 34 non-zero values, that is
4230    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4231    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4232    as follows
4233 
4234 .vb
4235             1  2  0  |  0  3  0  |  0  4
4236     Proc0   0  5  6  |  7  0  0  |  8  0
4237             9  0 10  | 11  0  0  | 12  0
4238     -------------------------------------
4239            13  0 14  | 15 16 17  |  0  0
4240     Proc1   0 18  0  | 19 20 21  |  0  0
4241             0  0  0  | 22 23  0  | 24  0
4242     -------------------------------------
4243     Proc2  25 26 27  |  0  0 28  | 29  0
4244            30  0  0  | 31 32 33  |  0 34
4245 .ve
4246 
4247    This can be represented as a collection of submatrices as
4248 
4249 .vb
4250       A B C
4251       D E F
4252       G H I
4253 .ve
4254 
4255    Where the submatrices A,B,C are owned by proc0, D,E,F are
4256    owned by proc1, G,H,I are owned by proc2.
4257 
4258    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4259    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4260    The 'M','N' parameters are 8,8, and have the same values on all procs.
4261 
4262    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4263    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4264    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4265    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4266    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4267    matrix, ans [DF] as another SeqAIJ matrix.
4268 
4269    When d_nz, o_nz parameters are specified, d_nz storage elements are
4270    allocated for every row of the local diagonal submatrix, and o_nz
4271    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4272    One way to choose d_nz and o_nz is to use the max nonzerors per local
4273    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4274    In this case, the values of d_nz,o_nz are
4275 .vb
4276      proc0 : dnz = 2, o_nz = 2
4277      proc1 : dnz = 3, o_nz = 2
4278      proc2 : dnz = 1, o_nz = 4
4279 .ve
4280    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4281    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4282    for proc3. i.e we are using 12+15+10=37 storage locations to store
4283    34 values.
4284 
4285    When d_nnz, o_nnz parameters are specified, the storage is specified
4286    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4287    In the above case the values for d_nnz,o_nnz are
4288 .vb
4289      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4290      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4291      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4292 .ve
4293    Here the space allocated is sum of all the above values i.e 34, and
4294    hence pre-allocation is perfect.
4295 
4296    Level: intermediate
4297 
4298 .keywords: matrix, aij, compressed row, sparse, parallel
4299 
4300 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4301           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4302 @*/
4303 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4304 {
4305   PetscErrorCode ierr;
4306   PetscMPIInt    size;
4307 
4308   PetscFunctionBegin;
4309   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4310   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4311   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4312   if (size > 1) {
4313     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4314     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4315   } else {
4316     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4317     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4318   }
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4323 {
4324   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4325   PetscBool      flg;
4326   PetscErrorCode ierr;
4327 
4328   PetscFunctionBegin;
4329   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4330   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4331   if (Ad)     *Ad     = a->A;
4332   if (Ao)     *Ao     = a->B;
4333   if (colmap) *colmap = a->garray;
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4338 {
4339   PetscErrorCode ierr;
4340   PetscInt       m,N,i,rstart,nnz,Ii;
4341   PetscInt       *indx;
4342   PetscScalar    *values;
4343 
4344   PetscFunctionBegin;
4345   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4346   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4347     PetscInt       *dnz,*onz,sum,bs,cbs;
4348 
4349     if (n == PETSC_DECIDE) {
4350       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4351     }
4352     /* Check sum(n) = N */
4353     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4354     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4355 
4356     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4357     rstart -= m;
4358 
4359     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4360     for (i=0; i<m; i++) {
4361       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4362       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4363       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4364     }
4365 
4366     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4367     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4368     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4369     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4370     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4371     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4372     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4373     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4374   }
4375 
4376   /* numeric phase */
4377   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4378   for (i=0; i<m; i++) {
4379     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4380     Ii   = i + rstart;
4381     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4382     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4383   }
4384   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4385   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4386   PetscFunctionReturn(0);
4387 }
4388 
4389 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4390 {
4391   PetscErrorCode    ierr;
4392   PetscMPIInt       rank;
4393   PetscInt          m,N,i,rstart,nnz;
4394   size_t            len;
4395   const PetscInt    *indx;
4396   PetscViewer       out;
4397   char              *name;
4398   Mat               B;
4399   const PetscScalar *values;
4400 
4401   PetscFunctionBegin;
4402   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4403   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4404   /* Should this be the type of the diagonal block of A? */
4405   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4406   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4407   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4408   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4409   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4410   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4411   for (i=0; i<m; i++) {
4412     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4413     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4414     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4415   }
4416   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4417   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4418 
4419   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4420   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4421   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4422   sprintf(name,"%s.%d",outfile,rank);
4423   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4424   ierr = PetscFree(name);CHKERRQ(ierr);
4425   ierr = MatView(B,out);CHKERRQ(ierr);
4426   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4427   ierr = MatDestroy(&B);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4432 {
4433   PetscErrorCode      ierr;
4434   Mat_Merge_SeqsToMPI *merge;
4435   PetscContainer      container;
4436 
4437   PetscFunctionBegin;
4438   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4439   if (container) {
4440     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4446     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4447     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4448     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4449     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4450     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4451     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4452     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4453     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4454     ierr = PetscFree(merge);CHKERRQ(ierr);
4455     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4456   }
4457   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4458   PetscFunctionReturn(0);
4459 }
4460 
4461 #include <../src/mat/utils/freespace.h>
4462 #include <petscbt.h>
4463 
4464 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4465 {
4466   PetscErrorCode      ierr;
4467   MPI_Comm            comm;
4468   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4469   PetscMPIInt         size,rank,taga,*len_s;
4470   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4471   PetscInt            proc,m;
4472   PetscInt            **buf_ri,**buf_rj;
4473   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4474   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4475   MPI_Request         *s_waits,*r_waits;
4476   MPI_Status          *status;
4477   MatScalar           *aa=a->a;
4478   MatScalar           **abuf_r,*ba_i;
4479   Mat_Merge_SeqsToMPI *merge;
4480   PetscContainer      container;
4481 
4482   PetscFunctionBegin;
4483   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4484   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4485 
4486   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4487   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4488 
4489   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4490   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4491 
4492   bi     = merge->bi;
4493   bj     = merge->bj;
4494   buf_ri = merge->buf_ri;
4495   buf_rj = merge->buf_rj;
4496 
4497   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4498   owners = merge->rowmap->range;
4499   len_s  = merge->len_s;
4500 
4501   /* send and recv matrix values */
4502   /*-----------------------------*/
4503   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4504   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4505 
4506   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4507   for (proc=0,k=0; proc<size; proc++) {
4508     if (!len_s[proc]) continue;
4509     i    = owners[proc];
4510     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4511     k++;
4512   }
4513 
4514   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4515   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4516   ierr = PetscFree(status);CHKERRQ(ierr);
4517 
4518   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4519   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4520 
4521   /* insert mat values of mpimat */
4522   /*----------------------------*/
4523   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4524   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4525 
4526   for (k=0; k<merge->nrecv; k++) {
4527     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4528     nrows       = *(buf_ri_k[k]);
4529     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4530     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4531   }
4532 
4533   /* set values of ba */
4534   m = merge->rowmap->n;
4535   for (i=0; i<m; i++) {
4536     arow = owners[rank] + i;
4537     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4538     bnzi = bi[i+1] - bi[i];
4539     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4540 
4541     /* add local non-zero vals of this proc's seqmat into ba */
4542     anzi   = ai[arow+1] - ai[arow];
4543     aj     = a->j + ai[arow];
4544     aa     = a->a + ai[arow];
4545     nextaj = 0;
4546     for (j=0; nextaj<anzi; j++) {
4547       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4548         ba_i[j] += aa[nextaj++];
4549       }
4550     }
4551 
4552     /* add received vals into ba */
4553     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4554       /* i-th row */
4555       if (i == *nextrow[k]) {
4556         anzi   = *(nextai[k]+1) - *nextai[k];
4557         aj     = buf_rj[k] + *(nextai[k]);
4558         aa     = abuf_r[k] + *(nextai[k]);
4559         nextaj = 0;
4560         for (j=0; nextaj<anzi; j++) {
4561           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4562             ba_i[j] += aa[nextaj++];
4563           }
4564         }
4565         nextrow[k]++; nextai[k]++;
4566       }
4567     }
4568     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4569   }
4570   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4571   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4572 
4573   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4574   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4575   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4576   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4577   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4578   PetscFunctionReturn(0);
4579 }
4580 
4581 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4582 {
4583   PetscErrorCode      ierr;
4584   Mat                 B_mpi;
4585   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4586   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4587   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4588   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4589   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4590   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4591   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4592   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4593   MPI_Status          *status;
4594   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4595   PetscBT             lnkbt;
4596   Mat_Merge_SeqsToMPI *merge;
4597   PetscContainer      container;
4598 
4599   PetscFunctionBegin;
4600   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4601 
4602   /* make sure it is a PETSc comm */
4603   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4604   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4605   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4606 
4607   ierr = PetscNew(&merge);CHKERRQ(ierr);
4608   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4609 
4610   /* determine row ownership */
4611   /*---------------------------------------------------------*/
4612   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4613   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4614   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4615   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4616   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4617   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4618   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4619 
4620   m      = merge->rowmap->n;
4621   owners = merge->rowmap->range;
4622 
4623   /* determine the number of messages to send, their lengths */
4624   /*---------------------------------------------------------*/
4625   len_s = merge->len_s;
4626 
4627   len          = 0; /* length of buf_si[] */
4628   merge->nsend = 0;
4629   for (proc=0; proc<size; proc++) {
4630     len_si[proc] = 0;
4631     if (proc == rank) {
4632       len_s[proc] = 0;
4633     } else {
4634       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4635       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4636     }
4637     if (len_s[proc]) {
4638       merge->nsend++;
4639       nrows = 0;
4640       for (i=owners[proc]; i<owners[proc+1]; i++) {
4641         if (ai[i+1] > ai[i]) nrows++;
4642       }
4643       len_si[proc] = 2*(nrows+1);
4644       len         += len_si[proc];
4645     }
4646   }
4647 
4648   /* determine the number and length of messages to receive for ij-structure */
4649   /*-------------------------------------------------------------------------*/
4650   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4651   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4652 
4653   /* post the Irecv of j-structure */
4654   /*-------------------------------*/
4655   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4656   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4657 
4658   /* post the Isend of j-structure */
4659   /*--------------------------------*/
4660   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4661 
4662   for (proc=0, k=0; proc<size; proc++) {
4663     if (!len_s[proc]) continue;
4664     i    = owners[proc];
4665     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4666     k++;
4667   }
4668 
4669   /* receives and sends of j-structure are complete */
4670   /*------------------------------------------------*/
4671   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4672   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4673 
4674   /* send and recv i-structure */
4675   /*---------------------------*/
4676   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4677   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4678 
4679   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4680   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4681   for (proc=0,k=0; proc<size; proc++) {
4682     if (!len_s[proc]) continue;
4683     /* form outgoing message for i-structure:
4684          buf_si[0]:                 nrows to be sent
4685                [1:nrows]:           row index (global)
4686                [nrows+1:2*nrows+1]: i-structure index
4687     */
4688     /*-------------------------------------------*/
4689     nrows       = len_si[proc]/2 - 1;
4690     buf_si_i    = buf_si + nrows+1;
4691     buf_si[0]   = nrows;
4692     buf_si_i[0] = 0;
4693     nrows       = 0;
4694     for (i=owners[proc]; i<owners[proc+1]; i++) {
4695       anzi = ai[i+1] - ai[i];
4696       if (anzi) {
4697         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4698         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4699         nrows++;
4700       }
4701     }
4702     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4703     k++;
4704     buf_si += len_si[proc];
4705   }
4706 
4707   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4708   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4709 
4710   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4711   for (i=0; i<merge->nrecv; i++) {
4712     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4713   }
4714 
4715   ierr = PetscFree(len_si);CHKERRQ(ierr);
4716   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4717   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4718   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4719   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4720   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4721   ierr = PetscFree(status);CHKERRQ(ierr);
4722 
4723   /* compute a local seq matrix in each processor */
4724   /*----------------------------------------------*/
4725   /* allocate bi array and free space for accumulating nonzero column info */
4726   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4727   bi[0] = 0;
4728 
4729   /* create and initialize a linked list */
4730   nlnk = N+1;
4731   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4732 
4733   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4734   len  = ai[owners[rank+1]] - ai[owners[rank]];
4735   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4736 
4737   current_space = free_space;
4738 
4739   /* determine symbolic info for each local row */
4740   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4741 
4742   for (k=0; k<merge->nrecv; k++) {
4743     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4744     nrows       = *buf_ri_k[k];
4745     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4746     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4747   }
4748 
4749   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4750   len  = 0;
4751   for (i=0; i<m; i++) {
4752     bnzi = 0;
4753     /* add local non-zero cols of this proc's seqmat into lnk */
4754     arow  = owners[rank] + i;
4755     anzi  = ai[arow+1] - ai[arow];
4756     aj    = a->j + ai[arow];
4757     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4758     bnzi += nlnk;
4759     /* add received col data into lnk */
4760     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4761       if (i == *nextrow[k]) { /* i-th row */
4762         anzi  = *(nextai[k]+1) - *nextai[k];
4763         aj    = buf_rj[k] + *nextai[k];
4764         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4765         bnzi += nlnk;
4766         nextrow[k]++; nextai[k]++;
4767       }
4768     }
4769     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4770 
4771     /* if free space is not available, make more free space */
4772     if (current_space->local_remaining<bnzi) {
4773       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4774       nspacedouble++;
4775     }
4776     /* copy data into free space, then initialize lnk */
4777     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4778     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4779 
4780     current_space->array           += bnzi;
4781     current_space->local_used      += bnzi;
4782     current_space->local_remaining -= bnzi;
4783 
4784     bi[i+1] = bi[i] + bnzi;
4785   }
4786 
4787   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4788 
4789   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4790   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4791   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4792 
4793   /* create symbolic parallel matrix B_mpi */
4794   /*---------------------------------------*/
4795   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4796   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4797   if (n==PETSC_DECIDE) {
4798     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4799   } else {
4800     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4801   }
4802   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4803   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4804   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4805   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4806   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4807 
4808   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4809   B_mpi->assembled    = PETSC_FALSE;
4810   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4811   merge->bi           = bi;
4812   merge->bj           = bj;
4813   merge->buf_ri       = buf_ri;
4814   merge->buf_rj       = buf_rj;
4815   merge->coi          = NULL;
4816   merge->coj          = NULL;
4817   merge->owners_co    = NULL;
4818 
4819   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4820 
4821   /* attach the supporting struct to B_mpi for reuse */
4822   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4823   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4824   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4825   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4826   *mpimat = B_mpi;
4827 
4828   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4829   PetscFunctionReturn(0);
4830 }
4831 
4832 /*@C
4833       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4834                  matrices from each processor
4835 
4836     Collective on MPI_Comm
4837 
4838    Input Parameters:
4839 +    comm - the communicators the parallel matrix will live on
4840 .    seqmat - the input sequential matrices
4841 .    m - number of local rows (or PETSC_DECIDE)
4842 .    n - number of local columns (or PETSC_DECIDE)
4843 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4844 
4845    Output Parameter:
4846 .    mpimat - the parallel matrix generated
4847 
4848     Level: advanced
4849 
4850    Notes:
4851      The dimensions of the sequential matrix in each processor MUST be the same.
4852      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4853      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4854 @*/
4855 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4856 {
4857   PetscErrorCode ierr;
4858   PetscMPIInt    size;
4859 
4860   PetscFunctionBegin;
4861   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4862   if (size == 1) {
4863     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4864     if (scall == MAT_INITIAL_MATRIX) {
4865       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4866     } else {
4867       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4868     }
4869     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4870     PetscFunctionReturn(0);
4871   }
4872   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4873   if (scall == MAT_INITIAL_MATRIX) {
4874     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4875   }
4876   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4877   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4878   PetscFunctionReturn(0);
4879 }
4880 
4881 /*@
4882      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4883           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4884           with MatGetSize()
4885 
4886     Not Collective
4887 
4888    Input Parameters:
4889 +    A - the matrix
4890 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4891 
4892    Output Parameter:
4893 .    A_loc - the local sequential matrix generated
4894 
4895     Level: developer
4896 
4897 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4898 
4899 @*/
4900 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4901 {
4902   PetscErrorCode ierr;
4903   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4904   Mat_SeqAIJ     *mat,*a,*b;
4905   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4906   MatScalar      *aa,*ba,*cam;
4907   PetscScalar    *ca;
4908   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4909   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4910   PetscBool      match;
4911   MPI_Comm       comm;
4912   PetscMPIInt    size;
4913 
4914   PetscFunctionBegin;
4915   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4916   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4917   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4918   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4919   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4920 
4921   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4922   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4923   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4924   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4925   aa = a->a; ba = b->a;
4926   if (scall == MAT_INITIAL_MATRIX) {
4927     if (size == 1) {
4928       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4929       PetscFunctionReturn(0);
4930     }
4931 
4932     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4933     ci[0] = 0;
4934     for (i=0; i<am; i++) {
4935       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4936     }
4937     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4938     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4939     k    = 0;
4940     for (i=0; i<am; i++) {
4941       ncols_o = bi[i+1] - bi[i];
4942       ncols_d = ai[i+1] - ai[i];
4943       /* off-diagonal portion of A */
4944       for (jo=0; jo<ncols_o; jo++) {
4945         col = cmap[*bj];
4946         if (col >= cstart) break;
4947         cj[k]   = col; bj++;
4948         ca[k++] = *ba++;
4949       }
4950       /* diagonal portion of A */
4951       for (j=0; j<ncols_d; j++) {
4952         cj[k]   = cstart + *aj++;
4953         ca[k++] = *aa++;
4954       }
4955       /* off-diagonal portion of A */
4956       for (j=jo; j<ncols_o; j++) {
4957         cj[k]   = cmap[*bj++];
4958         ca[k++] = *ba++;
4959       }
4960     }
4961     /* put together the new matrix */
4962     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4963     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4964     /* Since these are PETSc arrays, change flags to free them as necessary. */
4965     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4966     mat->free_a  = PETSC_TRUE;
4967     mat->free_ij = PETSC_TRUE;
4968     mat->nonew   = 0;
4969   } else if (scall == MAT_REUSE_MATRIX) {
4970     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4971     ci = mat->i; cj = mat->j; cam = mat->a;
4972     for (i=0; i<am; i++) {
4973       /* off-diagonal portion of A */
4974       ncols_o = bi[i+1] - bi[i];
4975       for (jo=0; jo<ncols_o; jo++) {
4976         col = cmap[*bj];
4977         if (col >= cstart) break;
4978         *cam++ = *ba++; bj++;
4979       }
4980       /* diagonal portion of A */
4981       ncols_d = ai[i+1] - ai[i];
4982       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4983       /* off-diagonal portion of A */
4984       for (j=jo; j<ncols_o; j++) {
4985         *cam++ = *ba++; bj++;
4986       }
4987     }
4988   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4989   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4990   PetscFunctionReturn(0);
4991 }
4992 
4993 /*@C
4994      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4995 
4996     Not Collective
4997 
4998    Input Parameters:
4999 +    A - the matrix
5000 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5001 -    row, col - index sets of rows and columns to extract (or NULL)
5002 
5003    Output Parameter:
5004 .    A_loc - the local sequential matrix generated
5005 
5006     Level: developer
5007 
5008 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5009 
5010 @*/
5011 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5012 {
5013   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5014   PetscErrorCode ierr;
5015   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5016   IS             isrowa,iscola;
5017   Mat            *aloc;
5018   PetscBool      match;
5019 
5020   PetscFunctionBegin;
5021   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5022   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5023   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5024   if (!row) {
5025     start = A->rmap->rstart; end = A->rmap->rend;
5026     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5027   } else {
5028     isrowa = *row;
5029   }
5030   if (!col) {
5031     start = A->cmap->rstart;
5032     cmap  = a->garray;
5033     nzA   = a->A->cmap->n;
5034     nzB   = a->B->cmap->n;
5035     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5036     ncols = 0;
5037     for (i=0; i<nzB; i++) {
5038       if (cmap[i] < start) idx[ncols++] = cmap[i];
5039       else break;
5040     }
5041     imark = i;
5042     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5043     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5044     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5045   } else {
5046     iscola = *col;
5047   }
5048   if (scall != MAT_INITIAL_MATRIX) {
5049     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5050     aloc[0] = *A_loc;
5051   }
5052   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5053   *A_loc = aloc[0];
5054   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5055   if (!row) {
5056     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5057   }
5058   if (!col) {
5059     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5060   }
5061   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5062   PetscFunctionReturn(0);
5063 }
5064 
5065 /*@C
5066     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5067 
5068     Collective on Mat
5069 
5070    Input Parameters:
5071 +    A,B - the matrices in mpiaij format
5072 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5073 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5074 
5075    Output Parameter:
5076 +    rowb, colb - index sets of rows and columns of B to extract
5077 -    B_seq - the sequential matrix generated
5078 
5079     Level: developer
5080 
5081 @*/
5082 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5083 {
5084   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5085   PetscErrorCode ierr;
5086   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5087   IS             isrowb,iscolb;
5088   Mat            *bseq=NULL;
5089 
5090   PetscFunctionBegin;
5091   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5092     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5093   }
5094   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5095 
5096   if (scall == MAT_INITIAL_MATRIX) {
5097     start = A->cmap->rstart;
5098     cmap  = a->garray;
5099     nzA   = a->A->cmap->n;
5100     nzB   = a->B->cmap->n;
5101     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5102     ncols = 0;
5103     for (i=0; i<nzB; i++) {  /* row < local row index */
5104       if (cmap[i] < start) idx[ncols++] = cmap[i];
5105       else break;
5106     }
5107     imark = i;
5108     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5109     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5110     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5111     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5112   } else {
5113     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5114     isrowb  = *rowb; iscolb = *colb;
5115     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5116     bseq[0] = *B_seq;
5117   }
5118   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5119   *B_seq = bseq[0];
5120   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5121   if (!rowb) {
5122     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5123   } else {
5124     *rowb = isrowb;
5125   }
5126   if (!colb) {
5127     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5128   } else {
5129     *colb = iscolb;
5130   }
5131   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5132   PetscFunctionReturn(0);
5133 }
5134 
5135 /*
5136     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5137     of the OFF-DIAGONAL portion of local A
5138 
5139     Collective on Mat
5140 
5141    Input Parameters:
5142 +    A,B - the matrices in mpiaij format
5143 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5144 
5145    Output Parameter:
5146 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5147 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5148 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5149 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5150 
5151     Level: developer
5152 
5153 */
5154 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5155 {
5156   VecScatter_MPI_General *gen_to,*gen_from;
5157   PetscErrorCode         ierr;
5158   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5159   Mat_SeqAIJ             *b_oth;
5160   VecScatter             ctx;
5161   MPI_Comm               comm;
5162   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5163   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5164   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5165   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5166   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5167   MPI_Request            *rwaits = NULL,*swaits = NULL;
5168   MPI_Status             *sstatus,rstatus;
5169   PetscMPIInt            jj,size;
5170   VecScatterType         type;
5171   PetscBool              mpi1;
5172 
5173   PetscFunctionBegin;
5174   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5176 
5177   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5178     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5179   }
5180   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5181   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5182 
5183   if (size == 1) {
5184     startsj_s = NULL;
5185     bufa_ptr  = NULL;
5186     *B_oth    = NULL;
5187     PetscFunctionReturn(0);
5188   }
5189 
5190   ctx = a->Mvctx;
5191   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5192   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5193   if (!mpi1) {
5194     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5195      thus create a->Mvctx_mpi1 */
5196     if (!a->Mvctx_mpi1) {
5197       a->Mvctx_mpi1_flg = PETSC_TRUE;
5198       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5199     }
5200     ctx = a->Mvctx_mpi1;
5201   }
5202   tag = ((PetscObject)ctx)->tag;
5203 
5204   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5205   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5206   nrecvs   = gen_from->n;
5207   nsends   = gen_to->n;
5208 
5209   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5210   srow    = gen_to->indices;    /* local row index to be sent */
5211   sstarts = gen_to->starts;
5212   sprocs  = gen_to->procs;
5213   sstatus = gen_to->sstatus;
5214   sbs     = gen_to->bs;
5215   rstarts = gen_from->starts;
5216   rprocs  = gen_from->procs;
5217   rbs     = gen_from->bs;
5218 
5219   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5220   if (scall == MAT_INITIAL_MATRIX) {
5221     /* i-array */
5222     /*---------*/
5223     /*  post receives */
5224     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5225     for (i=0; i<nrecvs; i++) {
5226       rowlen = rvalues + rstarts[i]*rbs;
5227       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5228       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5229     }
5230 
5231     /* pack the outgoing message */
5232     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5233 
5234     sstartsj[0] = 0;
5235     rstartsj[0] = 0;
5236     len         = 0; /* total length of j or a array to be sent */
5237     k           = 0;
5238     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5239     for (i=0; i<nsends; i++) {
5240       rowlen = svalues + sstarts[i]*sbs;
5241       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5242       for (j=0; j<nrows; j++) {
5243         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5244         for (l=0; l<sbs; l++) {
5245           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5246 
5247           rowlen[j*sbs+l] = ncols;
5248 
5249           len += ncols;
5250           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5251         }
5252         k++;
5253       }
5254       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5255 
5256       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5257     }
5258     /* recvs and sends of i-array are completed */
5259     i = nrecvs;
5260     while (i--) {
5261       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5262     }
5263     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5264     ierr = PetscFree(svalues);CHKERRQ(ierr);
5265 
5266     /* allocate buffers for sending j and a arrays */
5267     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5268     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5269 
5270     /* create i-array of B_oth */
5271     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5272 
5273     b_othi[0] = 0;
5274     len       = 0; /* total length of j or a array to be received */
5275     k         = 0;
5276     for (i=0; i<nrecvs; i++) {
5277       rowlen = rvalues + rstarts[i]*rbs;
5278       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5279       for (j=0; j<nrows; j++) {
5280         b_othi[k+1] = b_othi[k] + rowlen[j];
5281         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5282         k++;
5283       }
5284       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5285     }
5286     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5287 
5288     /* allocate space for j and a arrrays of B_oth */
5289     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5290     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5291 
5292     /* j-array */
5293     /*---------*/
5294     /*  post receives of j-array */
5295     for (i=0; i<nrecvs; i++) {
5296       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5297       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5298     }
5299 
5300     /* pack the outgoing message j-array */
5301     k = 0;
5302     for (i=0; i<nsends; i++) {
5303       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5304       bufJ  = bufj+sstartsj[i];
5305       for (j=0; j<nrows; j++) {
5306         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5307         for (ll=0; ll<sbs; ll++) {
5308           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5309           for (l=0; l<ncols; l++) {
5310             *bufJ++ = cols[l];
5311           }
5312           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5313         }
5314       }
5315       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5316     }
5317 
5318     /* recvs and sends of j-array are completed */
5319     i = nrecvs;
5320     while (i--) {
5321       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5322     }
5323     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5324   } else if (scall == MAT_REUSE_MATRIX) {
5325     sstartsj = *startsj_s;
5326     rstartsj = *startsj_r;
5327     bufa     = *bufa_ptr;
5328     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5329     b_otha   = b_oth->a;
5330   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5331 
5332   /* a-array */
5333   /*---------*/
5334   /*  post receives of a-array */
5335   for (i=0; i<nrecvs; i++) {
5336     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5337     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5338   }
5339 
5340   /* pack the outgoing message a-array */
5341   k = 0;
5342   for (i=0; i<nsends; i++) {
5343     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5344     bufA  = bufa+sstartsj[i];
5345     for (j=0; j<nrows; j++) {
5346       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5347       for (ll=0; ll<sbs; ll++) {
5348         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5349         for (l=0; l<ncols; l++) {
5350           *bufA++ = vals[l];
5351         }
5352         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5353       }
5354     }
5355     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5356   }
5357   /* recvs and sends of a-array are completed */
5358   i = nrecvs;
5359   while (i--) {
5360     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5361   }
5362   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5363   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5364 
5365   if (scall == MAT_INITIAL_MATRIX) {
5366     /* put together the new matrix */
5367     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5368 
5369     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5370     /* Since these are PETSc arrays, change flags to free them as necessary. */
5371     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5372     b_oth->free_a  = PETSC_TRUE;
5373     b_oth->free_ij = PETSC_TRUE;
5374     b_oth->nonew   = 0;
5375 
5376     ierr = PetscFree(bufj);CHKERRQ(ierr);
5377     if (!startsj_s || !bufa_ptr) {
5378       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5379       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5380     } else {
5381       *startsj_s = sstartsj;
5382       *startsj_r = rstartsj;
5383       *bufa_ptr  = bufa;
5384     }
5385   }
5386   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5387   PetscFunctionReturn(0);
5388 }
5389 
5390 /*@C
5391   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5392 
5393   Not Collective
5394 
5395   Input Parameters:
5396 . A - The matrix in mpiaij format
5397 
5398   Output Parameter:
5399 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5400 . colmap - A map from global column index to local index into lvec
5401 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5402 
5403   Level: developer
5404 
5405 @*/
5406 #if defined(PETSC_USE_CTABLE)
5407 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5408 #else
5409 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5410 #endif
5411 {
5412   Mat_MPIAIJ *a;
5413 
5414   PetscFunctionBegin;
5415   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5416   PetscValidPointer(lvec, 2);
5417   PetscValidPointer(colmap, 3);
5418   PetscValidPointer(multScatter, 4);
5419   a = (Mat_MPIAIJ*) A->data;
5420   if (lvec) *lvec = a->lvec;
5421   if (colmap) *colmap = a->colmap;
5422   if (multScatter) *multScatter = a->Mvctx;
5423   PetscFunctionReturn(0);
5424 }
5425 
5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5428 #if defined(PETSC_HAVE_MKL_SPARSE)
5429 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5430 #endif
5431 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5432 #if defined(PETSC_HAVE_ELEMENTAL)
5433 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5434 #endif
5435 #if defined(PETSC_HAVE_HYPRE)
5436 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5437 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5438 #endif
5439 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5440 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5441 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5442 
5443 /*
5444     Computes (B'*A')' since computing B*A directly is untenable
5445 
5446                n                       p                          p
5447         (              )       (              )         (                  )
5448       m (      A       )  *  n (       B      )   =   m (         C        )
5449         (              )       (              )         (                  )
5450 
5451 */
5452 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5453 {
5454   PetscErrorCode ierr;
5455   Mat            At,Bt,Ct;
5456 
5457   PetscFunctionBegin;
5458   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5459   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5460   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5461   ierr = MatDestroy(&At);CHKERRQ(ierr);
5462   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5463   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5464   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5465   PetscFunctionReturn(0);
5466 }
5467 
5468 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5469 {
5470   PetscErrorCode ierr;
5471   PetscInt       m=A->rmap->n,n=B->cmap->n;
5472   Mat            Cmat;
5473 
5474   PetscFunctionBegin;
5475   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5476   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5477   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5478   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5479   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5480   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5481   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5482   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5483 
5484   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5485 
5486   *C = Cmat;
5487   PetscFunctionReturn(0);
5488 }
5489 
5490 /* ----------------------------------------------------------------*/
5491 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5492 {
5493   PetscErrorCode ierr;
5494 
5495   PetscFunctionBegin;
5496   if (scall == MAT_INITIAL_MATRIX) {
5497     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5498     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5499     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5500   }
5501   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5502   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5503   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5504   PetscFunctionReturn(0);
5505 }
5506 
5507 /*MC
5508    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5509 
5510    Options Database Keys:
5511 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5512 
5513   Level: beginner
5514 
5515 .seealso: MatCreateAIJ()
5516 M*/
5517 
5518 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5519 {
5520   Mat_MPIAIJ     *b;
5521   PetscErrorCode ierr;
5522   PetscMPIInt    size;
5523 
5524   PetscFunctionBegin;
5525   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5526 
5527   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5528   B->data       = (void*)b;
5529   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5530   B->assembled  = PETSC_FALSE;
5531   B->insertmode = NOT_SET_VALUES;
5532   b->size       = size;
5533 
5534   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5535 
5536   /* build cache for off array entries formed */
5537   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5538 
5539   b->donotstash  = PETSC_FALSE;
5540   b->colmap      = 0;
5541   b->garray      = 0;
5542   b->roworiented = PETSC_TRUE;
5543 
5544   /* stuff used for matrix vector multiply */
5545   b->lvec  = NULL;
5546   b->Mvctx = NULL;
5547 
5548   /* stuff for MatGetRow() */
5549   b->rowindices   = 0;
5550   b->rowvalues    = 0;
5551   b->getrowactive = PETSC_FALSE;
5552 
5553   /* flexible pointer used in CUSP/CUSPARSE classes */
5554   b->spptr = NULL;
5555 
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5565 #if defined(PETSC_HAVE_MKL_SPARSE)
5566   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5567 #endif
5568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5569   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5570 #if defined(PETSC_HAVE_ELEMENTAL)
5571   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5572 #endif
5573 #if defined(PETSC_HAVE_HYPRE)
5574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5575 #endif
5576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5577   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5579   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5580   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5581 #if defined(PETSC_HAVE_HYPRE)
5582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5583 #endif
5584   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5585   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5586   PetscFunctionReturn(0);
5587 }
5588 
5589 /*@C
5590      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5591          and "off-diagonal" part of the matrix in CSR format.
5592 
5593    Collective on MPI_Comm
5594 
5595    Input Parameters:
5596 +  comm - MPI communicator
5597 .  m - number of local rows (Cannot be PETSC_DECIDE)
5598 .  n - This value should be the same as the local size used in creating the
5599        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5600        calculated if N is given) For square matrices n is almost always m.
5601 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5602 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5603 .   i - row indices for "diagonal" portion of matrix
5604 .   j - column indices
5605 .   a - matrix values
5606 .   oi - row indices for "off-diagonal" portion of matrix
5607 .   oj - column indices
5608 -   oa - matrix values
5609 
5610    Output Parameter:
5611 .   mat - the matrix
5612 
5613    Level: advanced
5614 
5615    Notes:
5616        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5617        must free the arrays once the matrix has been destroyed and not before.
5618 
5619        The i and j indices are 0 based
5620 
5621        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5622 
5623        This sets local rows and cannot be used to set off-processor values.
5624 
5625        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5626        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5627        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5628        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5629        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5630        communication if it is known that only local entries will be set.
5631 
5632 .keywords: matrix, aij, compressed row, sparse, parallel
5633 
5634 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5635           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5636 @*/
5637 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5638 {
5639   PetscErrorCode ierr;
5640   Mat_MPIAIJ     *maij;
5641 
5642   PetscFunctionBegin;
5643   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5644   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5645   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5646   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5647   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5648   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5649   maij = (Mat_MPIAIJ*) (*mat)->data;
5650 
5651   (*mat)->preallocated = PETSC_TRUE;
5652 
5653   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5654   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5655 
5656   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5657   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5658 
5659   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5660   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5661   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5662   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5663 
5664   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5665   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5666   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5667   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5668   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5669   PetscFunctionReturn(0);
5670 }
5671 
5672 /*
5673     Special version for direct calls from Fortran
5674 */
5675 #include <petsc/private/fortranimpl.h>
5676 
5677 /* Change these macros so can be used in void function */
5678 #undef CHKERRQ
5679 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5680 #undef SETERRQ2
5681 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5682 #undef SETERRQ3
5683 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5684 #undef SETERRQ
5685 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5686 
5687 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5688 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5689 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5690 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5691 #else
5692 #endif
5693 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5694 {
5695   Mat            mat  = *mmat;
5696   PetscInt       m    = *mm, n = *mn;
5697   InsertMode     addv = *maddv;
5698   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5699   PetscScalar    value;
5700   PetscErrorCode ierr;
5701 
5702   MatCheckPreallocated(mat,1);
5703   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5704 
5705 #if defined(PETSC_USE_DEBUG)
5706   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5707 #endif
5708   {
5709     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5710     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5711     PetscBool roworiented = aij->roworiented;
5712 
5713     /* Some Variables required in the macro */
5714     Mat        A                 = aij->A;
5715     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5716     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5717     MatScalar  *aa               = a->a;
5718     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5719     Mat        B                 = aij->B;
5720     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5721     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5722     MatScalar  *ba               = b->a;
5723 
5724     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5725     PetscInt  nonew = a->nonew;
5726     MatScalar *ap1,*ap2;
5727 
5728     PetscFunctionBegin;
5729     for (i=0; i<m; i++) {
5730       if (im[i] < 0) continue;
5731 #if defined(PETSC_USE_DEBUG)
5732       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5733 #endif
5734       if (im[i] >= rstart && im[i] < rend) {
5735         row      = im[i] - rstart;
5736         lastcol1 = -1;
5737         rp1      = aj + ai[row];
5738         ap1      = aa + ai[row];
5739         rmax1    = aimax[row];
5740         nrow1    = ailen[row];
5741         low1     = 0;
5742         high1    = nrow1;
5743         lastcol2 = -1;
5744         rp2      = bj + bi[row];
5745         ap2      = ba + bi[row];
5746         rmax2    = bimax[row];
5747         nrow2    = bilen[row];
5748         low2     = 0;
5749         high2    = nrow2;
5750 
5751         for (j=0; j<n; j++) {
5752           if (roworiented) value = v[i*n+j];
5753           else value = v[i+j*m];
5754           if (in[j] >= cstart && in[j] < cend) {
5755             col = in[j] - cstart;
5756             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5757             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5758           } else if (in[j] < 0) continue;
5759 #if defined(PETSC_USE_DEBUG)
5760           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5761           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5762 #endif
5763           else {
5764             if (mat->was_assembled) {
5765               if (!aij->colmap) {
5766                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5767               }
5768 #if defined(PETSC_USE_CTABLE)
5769               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5770               col--;
5771 #else
5772               col = aij->colmap[in[j]] - 1;
5773 #endif
5774               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5775               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5776                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5777                 col  =  in[j];
5778                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5779                 B     = aij->B;
5780                 b     = (Mat_SeqAIJ*)B->data;
5781                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5782                 rp2   = bj + bi[row];
5783                 ap2   = ba + bi[row];
5784                 rmax2 = bimax[row];
5785                 nrow2 = bilen[row];
5786                 low2  = 0;
5787                 high2 = nrow2;
5788                 bm    = aij->B->rmap->n;
5789                 ba    = b->a;
5790               }
5791             } else col = in[j];
5792             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5793           }
5794         }
5795       } else if (!aij->donotstash) {
5796         if (roworiented) {
5797           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5798         } else {
5799           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5800         }
5801       }
5802     }
5803   }
5804   PetscFunctionReturnVoid();
5805 }
5806 
5807