xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8e35f0b849840f9ce53aec35ecc268130adc8b70)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscBool      lf;
1105   PetscMPIInt    size;
1106 
1107   PetscFunctionBegin;
1108   /* Easy test: symmetric diagonal block */
1109   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1110   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1111   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1112   if (!*f) PetscFunctionReturn(0);
1113   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1115   if (size == 1) PetscFunctionReturn(0);
1116 
1117   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1118   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1119   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1120   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1121   for (i=0; i<first; i++) notme[i] = i;
1122   for (i=last; i<M; i++) notme[i-last+first] = i;
1123   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1124   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1125   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1126   Aoff = Aoffs[0];
1127   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1128   Boff = Boffs[0];
1129   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1130   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1131   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1132   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1133   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1134   ierr = PetscFree(notme);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* send it on its way */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   /* do local part */
1158   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   /* receive remote parts */
1160   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165   This only works correctly for square matrices where the subblock A->A is the
1166    diagonal block
1167 */
1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1169 {
1170   PetscErrorCode ierr;
1171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1172 
1173   PetscFunctionBegin;
1174   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1175   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1176   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1181 {
1182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1187   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1192 {
1193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197 #if defined(PETSC_USE_LOG)
1198   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1199 #endif
1200   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1201   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1202   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1203   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1204 #if defined(PETSC_USE_CTABLE)
1205   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1206 #else
1207   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1208 #endif
1209   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1210   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1212   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1213   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1214   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1215   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_ELEMENTAL)
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1228 #endif
1229 #if defined(PETSC_HAVE_HYPRE)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1239 {
1240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1241   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1242   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1243   PetscErrorCode ierr;
1244   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1245   int            fd;
1246   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1247   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1248   PetscScalar    *column_values;
1249   PetscInt       message_count,flowcontrolcount;
1250   FILE           *file;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1255   nz   = A->nz + B->nz;
1256   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257   if (!rank) {
1258     header[0] = MAT_FILE_CLASSID;
1259     header[1] = mat->rmap->N;
1260     header[2] = mat->cmap->N;
1261 
1262     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     /* get largest number of rows any processor has */
1265     rlen  = mat->rmap->n;
1266     range = mat->rmap->range;
1267     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1268   } else {
1269     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     rlen = mat->rmap->n;
1271   }
1272 
1273   /* load up the local row counts */
1274   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1275   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1276 
1277   /* store the row lengths to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1283       rlen = range[i+1] - range[i];
1284       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1290     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1291     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1292   }
1293   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1294 
1295   /* load up the local column indices */
1296   nzmax = nz; /* th processor needs space a largest processor needs */
1297   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1299   cnt   = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ((col = garray[B->j[j]]) > cstart) break;
1303       column_indices[cnt++] = col;
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1306     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1307   }
1308   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1309 
1310   /* store the column indices to the file */
1311   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1312   if (!rank) {
1313     MPI_Status status;
1314     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     for (i=1; i<size; i++) {
1316       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1317       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1318       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1319       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1321     }
1322     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1323   } else {
1324     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1325     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1326     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1328   }
1329   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1330 
1331   /* load up the local column values */
1332   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1333   cnt  = 0;
1334   for (i=0; i<mat->rmap->n; i++) {
1335     for (j=B->i[i]; j<B->i[i+1]; j++) {
1336       if (garray[B->j[j]] > cstart) break;
1337       column_values[cnt++] = B->a[j];
1338     }
1339     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1340     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1341   }
1342   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1343 
1344   /* store the column values to the file */
1345   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1346   if (!rank) {
1347     MPI_Status status;
1348     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1349     for (i=1; i<size; i++) {
1350       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1351       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1352       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1353       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1360     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(column_values);CHKERRQ(ierr);
1364 
1365   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1366   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #include <petscdraw.h>
1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1372 {
1373   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1374   PetscErrorCode    ierr;
1375   PetscMPIInt       rank = aij->rank,size = aij->size;
1376   PetscBool         isdraw,iascii,isbinary;
1377   PetscViewer       sviewer;
1378   PetscViewerFormat format;
1379 
1380   PetscFunctionBegin;
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1382   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1384   if (iascii) {
1385     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1386     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1387       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1388       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1389       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       for (i=0; i<(PetscInt)size; i++) {
1391         nmax = PetscMax(nmax,nz[i]);
1392         nmin = PetscMin(nmin,nz[i]);
1393         navg += nz[i];
1394       }
1395       ierr = PetscFree(nz);CHKERRQ(ierr);
1396       navg = navg/size;
1397       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1398       PetscFunctionReturn(0);
1399     }
1400     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1401     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1402       MatInfo   info;
1403       PetscBool inodes;
1404 
1405       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1406       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1407       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1409       if (!inodes) {
1410         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1411                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1412       } else {
1413         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1414                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1415       }
1416       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1420       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1421       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1422       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1423       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1424       PetscFunctionReturn(0);
1425     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1426       PetscInt inodecount,inodelimit,*inodes;
1427       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1428       if (inodes) {
1429         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1430       } else {
1431         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1432       }
1433       PetscFunctionReturn(0);
1434     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1435       PetscFunctionReturn(0);
1436     }
1437   } else if (isbinary) {
1438     if (size == 1) {
1439       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1440       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1441     } else {
1442       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1443     }
1444     PetscFunctionReturn(0);
1445   } else if (isdraw) {
1446     PetscDraw draw;
1447     PetscBool isnull;
1448     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1449     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1450     if (isnull) PetscFunctionReturn(0);
1451   }
1452 
1453   {
1454     /* assemble the entire matrix onto first processor. */
1455     Mat        A;
1456     Mat_SeqAIJ *Aloc;
1457     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1458     MatScalar  *a;
1459 
1460     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1461     if (!rank) {
1462       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1463     } else {
1464       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1465     }
1466     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1467     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1468     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1469     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1470     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1471 
1472     /* copy over the A part */
1473     Aloc = (Mat_SeqAIJ*)aij->A->data;
1474     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475     row  = mat->rmap->rstart;
1476     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1477     for (i=0; i<m; i++) {
1478       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1479       row++;
1480       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1481     }
1482     aj = Aloc->j;
1483     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1484 
1485     /* copy over the B part */
1486     Aloc = (Mat_SeqAIJ*)aij->B->data;
1487     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1488     row  = mat->rmap->rstart;
1489     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1490     ct   = cols;
1491     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1492     for (i=0; i<m; i++) {
1493       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1494       row++;
1495       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1496     }
1497     ierr = PetscFree(ct);CHKERRQ(ierr);
1498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1500     /*
1501        Everyone has to call to draw the matrix since the graphics waits are
1502        synchronized across all processors that share the PetscDraw object
1503     */
1504     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     if (!rank) {
1506       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1508     }
1509     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1510     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1517 {
1518   PetscErrorCode ierr;
1519   PetscBool      iascii,isdraw,issocket,isbinary;
1520 
1521   PetscFunctionBegin;
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1526   if (iascii || isdraw || isbinary || issocket) {
1527     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1533 {
1534   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1535   PetscErrorCode ierr;
1536   Vec            bb1 = 0;
1537   PetscBool      hasop;
1538 
1539   PetscFunctionBegin;
1540   if (flag == SOR_APPLY_UPPER) {
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542     PetscFunctionReturn(0);
1543   }
1544 
1545   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1547   }
1548 
1549   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554 
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587     while (its--) {
1588       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590 
1591       /* update rhs: bb1 = bb - B*x */
1592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1594 
1595       /* local sweep */
1596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1597     }
1598   } else if (flag & SOR_EISENSTAT) {
1599     Vec xx1;
1600 
1601     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1603 
1604     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     if (!mat->diag) {
1607       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1608       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1609     }
1610     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1611     if (hasop) {
1612       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1613     } else {
1614       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1615     }
1616     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1617 
1618     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1619 
1620     /* local sweep */
1621     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1622     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1623     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1624   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1625 
1626   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1627 
1628   matin->factorerrortype = mat->A->factorerrortype;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1633 {
1634   Mat            aA,aB,Aperm;
1635   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1636   PetscScalar    *aa,*ba;
1637   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1638   PetscSF        rowsf,sf;
1639   IS             parcolp = NULL;
1640   PetscBool      done;
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1645   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1648 
1649   /* Invert row permutation to find out where my rows should go */
1650   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1651   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1652   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1654   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656 
1657   /* Invert column permutation to find out where my columns should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1661   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1662   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1665 
1666   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1667   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1668   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1669 
1670   /* Find out where my gcols should go */
1671   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1672   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1674   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1675   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1676   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1679 
1680   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1681   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     PetscInt row = rdest[i],rowner;
1685     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1686     for (j=ai[i]; j<ai[i+1]; j++) {
1687       PetscInt cowner,col = cdest[aj[j]];
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692     for (j=bi[i]; j<bi[i+1]; j++) {
1693       PetscInt cowner,col = gcdest[bj[j]];
1694       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1695       if (rowner == cowner) dnnz[i]++;
1696       else onnz[i]++;
1697     }
1698   }
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1704 
1705   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1706   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1708   for (i=0; i<m; i++) {
1709     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1710     PetscInt j0,rowlen;
1711     rowlen = ai[i+1] - ai[i];
1712     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716     rowlen = bi[i+1] - bi[i];
1717     for (j0=j=0; j<rowlen; j0=j) {
1718       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1719       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1720     }
1721   }
1722   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1726   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1728   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1729   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1730   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1731   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1732   *B = Aperm;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1737 {
1738   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1743   if (ghosts) *ghosts = aij->garray;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A    = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size = 1.0;
1756   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1759   isend[3] = info->memory;  isend[4] = info->mallocs;
1760 
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762 
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1773 
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   } else if (flag == MAT_GLOBAL_SUM) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   }
1788   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1789   info->fill_ratio_needed = 0;
1790   info->factor_mallocs    = 0;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     MatCheckPreallocated(A,1);
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     MatCheckPreallocated(A,1);
1814     a->roworiented = flg;
1815 
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_NEW_DIAGONALS:
1820     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1821     break;
1822   case MAT_IGNORE_OFF_PROC_ENTRIES:
1823     a->donotstash = flg;
1824     break;
1825   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1826   case MAT_SPD:
1827   case MAT_SYMMETRIC:
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829   case MAT_HERMITIAN:
1830   case MAT_SYMMETRY_ETERNAL:
1831     break;
1832   case MAT_SUBMAT_SINGLEIS:
1833     A->submat_singleis = flg;
1834     break;
1835   case MAT_STRUCTURE_ONLY:
1836     /* The option is handled directly by MatSetOption() */
1837     break;
1838   default:
1839     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1840   }
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1845 {
1846   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1847   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1848   PetscErrorCode ierr;
1849   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1850   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1851   PetscInt       *cmap,*idx_p;
1852 
1853   PetscFunctionBegin;
1854   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1855   mat->getrowactive = PETSC_TRUE;
1856 
1857   if (!mat->rowvalues && (idx || v)) {
1858     /*
1859         allocate enough space to hold information from the longest row.
1860     */
1861     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1862     PetscInt   max = 1,tmp;
1863     for (i=0; i<matin->rmap->n; i++) {
1864       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1865       if (max < tmp) max = tmp;
1866     }
1867     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1868   }
1869 
1870   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1871   lrow = row - rstart;
1872 
1873   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1874   if (!v)   {pvA = 0; pvB = 0;}
1875   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1876   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1877   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1878   nztot = nzA + nzB;
1879 
1880   cmap = mat->garray;
1881   if (v  || idx) {
1882     if (nztot) {
1883       /* Sort by increasing column numbers, assuming A and B already sorted */
1884       PetscInt imark = -1;
1885       if (v) {
1886         *v = v_p = mat->rowvalues;
1887         for (i=0; i<nzB; i++) {
1888           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1889           else break;
1890         }
1891         imark = i;
1892         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1893         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1894       }
1895       if (idx) {
1896         *idx = idx_p = mat->rowindices;
1897         if (imark > -1) {
1898           for (i=0; i<imark; i++) {
1899             idx_p[i] = cmap[cworkB[i]];
1900           }
1901         } else {
1902           for (i=0; i<nzB; i++) {
1903             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1904             else break;
1905           }
1906           imark = i;
1907         }
1908         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1909         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1910       }
1911     } else {
1912       if (idx) *idx = 0;
1913       if (v)   *v   = 0;
1914     }
1915   }
1916   *nz  = nztot;
1917   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1918   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1923 {
1924   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1925 
1926   PetscFunctionBegin;
1927   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1928   aij->getrowactive = PETSC_FALSE;
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1933 {
1934   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1935   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1936   PetscErrorCode ierr;
1937   PetscInt       i,j,cstart = mat->cmap->rstart;
1938   PetscReal      sum = 0.0;
1939   MatScalar      *v;
1940 
1941   PetscFunctionBegin;
1942   if (aij->size == 1) {
1943     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1944   } else {
1945     if (type == NORM_FROBENIUS) {
1946       v = amat->a;
1947       for (i=0; i<amat->nz; i++) {
1948         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1949       }
1950       v = bmat->a;
1951       for (i=0; i<bmat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1955       *norm = PetscSqrtReal(*norm);
1956       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1957     } else if (type == NORM_1) { /* max column norm */
1958       PetscReal *tmp,*tmp2;
1959       PetscInt  *jj,*garray = aij->garray;
1960       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1961       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1962       *norm = 0.0;
1963       v     = amat->a; jj = amat->j;
1964       for (j=0; j<amat->nz; j++) {
1965         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1966       }
1967       v = bmat->a; jj = bmat->j;
1968       for (j=0; j<bmat->nz; j++) {
1969         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1970       }
1971       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       for (j=0; j<mat->cmap->N; j++) {
1973         if (tmp2[j] > *norm) *norm = tmp2[j];
1974       }
1975       ierr = PetscFree(tmp);CHKERRQ(ierr);
1976       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1977       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1978     } else if (type == NORM_INFINITY) { /* max row norm */
1979       PetscReal ntemp = 0.0;
1980       for (j=0; j<aij->A->rmap->n; j++) {
1981         v   = amat->a + amat->i[j];
1982         sum = 0.0;
1983         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1984           sum += PetscAbsScalar(*v); v++;
1985         }
1986         v = bmat->a + bmat->i[j];
1987         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         if (sum > ntemp) ntemp = sum;
1991       }
1992       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1993       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1994     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2000 {
2001   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2002   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2003   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2004   PetscErrorCode ierr;
2005   Mat            B,A_diag,*B_diag;
2006   MatScalar      *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2237     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2238   } else {
2239     Mat      B;
2240     PetscInt *nnz_d,*nnz_o;
2241     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2242     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2243     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2244     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2245     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2246     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2247     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2248     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2250     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2251     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2252     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2253     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2260 
2261 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2262 {
2263 #if defined(PETSC_USE_COMPLEX)
2264   PetscErrorCode ierr;
2265   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2269   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2270 #else
2271   PetscFunctionBegin;
2272 #endif
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2277 {
2278   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2283   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2288 {
2289   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2294   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2299 {
2300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2301   PetscErrorCode ierr;
2302   PetscInt       i,*idxb = 0;
2303   PetscScalar    *va,*vb;
2304   Vec            vtmp;
2305 
2306   PetscFunctionBegin;
2307   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2308   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2309   if (idx) {
2310     for (i=0; i<A->rmap->n; i++) {
2311       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2312     }
2313   }
2314 
2315   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2316   if (idx) {
2317     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2318   }
2319   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2320   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2321 
2322   for (i=0; i<A->rmap->n; i++) {
2323     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2324       va[i] = vb[i];
2325       if (idx) idx[i] = a->garray[idxb[i]];
2326     }
2327   }
2328 
2329   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2330   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2331   ierr = PetscFree(idxb);CHKERRQ(ierr);
2332   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2337 {
2338   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2339   PetscErrorCode ierr;
2340   PetscInt       i,*idxb = 0;
2341   PetscScalar    *va,*vb;
2342   Vec            vtmp;
2343 
2344   PetscFunctionBegin;
2345   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2346   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2347   if (idx) {
2348     for (i=0; i<A->cmap->n; i++) {
2349       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2350     }
2351   }
2352 
2353   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2354   if (idx) {
2355     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2356   }
2357   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2358   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2359 
2360   for (i=0; i<A->rmap->n; i++) {
2361     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2362       va[i] = vb[i];
2363       if (idx) idx[i] = a->garray[idxb[i]];
2364     }
2365   }
2366 
2367   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2368   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2369   ierr = PetscFree(idxb);CHKERRQ(ierr);
2370   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2375 {
2376   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2377   PetscInt       n      = A->rmap->n;
2378   PetscInt       cstart = A->cmap->rstart;
2379   PetscInt       *cmap  = mat->garray;
2380   PetscInt       *diagIdx, *offdiagIdx;
2381   Vec            diagV, offdiagV;
2382   PetscScalar    *a, *diagA, *offdiagA;
2383   PetscInt       r;
2384   PetscErrorCode ierr;
2385 
2386   PetscFunctionBegin;
2387   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2388   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2390   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2391   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2392   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2393   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2394   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2395   for (r = 0; r < n; ++r) {
2396     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2397       a[r]   = diagA[r];
2398       idx[r] = cstart + diagIdx[r];
2399     } else {
2400       a[r]   = offdiagA[r];
2401       idx[r] = cmap[offdiagIdx[r]];
2402     }
2403   }
2404   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2408   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2409   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2414 {
2415   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2416   PetscInt       n      = A->rmap->n;
2417   PetscInt       cstart = A->cmap->rstart;
2418   PetscInt       *cmap  = mat->garray;
2419   PetscInt       *diagIdx, *offdiagIdx;
2420   Vec            diagV, offdiagV;
2421   PetscScalar    *a, *diagA, *offdiagA;
2422   PetscInt       r;
2423   PetscErrorCode ierr;
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2427   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2428   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2429   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2430   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2432   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2433   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   for (r = 0; r < n; ++r) {
2435     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2436       a[r]   = diagA[r];
2437       idx[r] = cstart + diagIdx[r];
2438     } else {
2439       a[r]   = offdiagA[r];
2440       idx[r] = cmap[offdiagIdx[r]];
2441     }
2442   }
2443   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2444   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2448   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2453 {
2454   PetscErrorCode ierr;
2455   Mat            *dummy;
2456 
2457   PetscFunctionBegin;
2458   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2459   *newmat = *dummy;
2460   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2465 {
2466   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2467   PetscErrorCode ierr;
2468 
2469   PetscFunctionBegin;
2470   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2471   A->factorerrortype = a->A->factorerrortype;
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2476 {
2477   PetscErrorCode ierr;
2478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2479 
2480   PetscFunctionBegin;
2481   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2482   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2483   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2489 {
2490   PetscFunctionBegin;
2491   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2492   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 /*@
2497    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2498 
2499    Collective on Mat
2500 
2501    Input Parameters:
2502 +    A - the matrix
2503 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2504 
2505  Level: advanced
2506 
2507 @*/
2508 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2509 {
2510   PetscErrorCode       ierr;
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2518 {
2519   PetscErrorCode       ierr;
2520   PetscBool            sc = PETSC_FALSE,flg;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2524   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2525   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2526   if (flg) {
2527     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2528   }
2529   ierr = PetscOptionsTail();CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2534 {
2535   PetscErrorCode ierr;
2536   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2537   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2538 
2539   PetscFunctionBegin;
2540   if (!Y->preallocated) {
2541     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2542   } else if (!aij->nz) {
2543     PetscInt nonew = aij->nonew;
2544     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2545     aij->nonew = nonew;
2546   }
2547   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2552 {
2553   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2554   PetscErrorCode ierr;
2555 
2556   PetscFunctionBegin;
2557   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2558   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2559   if (d) {
2560     PetscInt rstart;
2561     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2562     *d += rstart;
2563 
2564   }
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2569 {
2570   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2571   PetscErrorCode ierr;
2572 
2573   PetscFunctionBegin;
2574   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 /* -------------------------------------------------------------------*/
2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2580                                        MatGetRow_MPIAIJ,
2581                                        MatRestoreRow_MPIAIJ,
2582                                        MatMult_MPIAIJ,
2583                                 /* 4*/ MatMultAdd_MPIAIJ,
2584                                        MatMultTranspose_MPIAIJ,
2585                                        MatMultTransposeAdd_MPIAIJ,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*10*/ 0,
2590                                        0,
2591                                        0,
2592                                        MatSOR_MPIAIJ,
2593                                        MatTranspose_MPIAIJ,
2594                                 /*15*/ MatGetInfo_MPIAIJ,
2595                                        MatEqual_MPIAIJ,
2596                                        MatGetDiagonal_MPIAIJ,
2597                                        MatDiagonalScale_MPIAIJ,
2598                                        MatNorm_MPIAIJ,
2599                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2600                                        MatAssemblyEnd_MPIAIJ,
2601                                        MatSetOption_MPIAIJ,
2602                                        MatZeroEntries_MPIAIJ,
2603                                 /*24*/ MatZeroRows_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*29*/ MatSetUp_MPIAIJ,
2609                                        0,
2610                                        0,
2611                                        MatGetDiagonalBlock_MPIAIJ,
2612                                        0,
2613                                 /*34*/ MatDuplicate_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*39*/ MatAXPY_MPIAIJ,
2619                                        MatCreateSubMatrices_MPIAIJ,
2620                                        MatIncreaseOverlap_MPIAIJ,
2621                                        MatGetValues_MPIAIJ,
2622                                        MatCopy_MPIAIJ,
2623                                 /*44*/ MatGetRowMax_MPIAIJ,
2624                                        MatScale_MPIAIJ,
2625                                        MatShift_MPIAIJ,
2626                                        MatDiagonalSet_MPIAIJ,
2627                                        MatZeroRowsColumns_MPIAIJ,
2628                                 /*49*/ MatSetRandom_MPIAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2634                                        0,
2635                                        MatSetUnfactored_MPIAIJ,
2636                                        MatPermute_MPIAIJ,
2637                                        0,
2638                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2639                                        MatDestroy_MPIAIJ,
2640                                        MatView_MPIAIJ,
2641                                        0,
2642                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2643                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2644                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2649                                        MatGetRowMinAbs_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*75*/ MatFDColoringApply_AIJ,
2655                                        MatSetFromOptions_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        MatFindZeroDiagonals_MPIAIJ,
2659                                 /*80*/ 0,
2660                                        0,
2661                                        0,
2662                                 /*83*/ MatLoad_MPIAIJ,
2663                                        MatIsSymmetric_MPIAIJ,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2669                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2670                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2671                                        MatPtAP_MPIAIJ_MPIAIJ,
2672                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2673                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                 /*99*/ 0,
2679                                        0,
2680                                        0,
2681                                        MatConjugate_MPIAIJ,
2682                                        0,
2683                                 /*104*/MatSetValuesRow_MPIAIJ,
2684                                        MatRealPart_MPIAIJ,
2685                                        MatImaginaryPart_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                 /*109*/0,
2689                                        0,
2690                                        MatGetRowMin_MPIAIJ,
2691                                        0,
2692                                        MatMissingDiagonal_MPIAIJ,
2693                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2694                                        0,
2695                                        MatGetGhosts_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                 /*119*/0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        MatGetMultiProcBlock_MPIAIJ,
2703                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2704                                        MatGetColumnNorms_MPIAIJ,
2705                                        MatInvertBlockDiagonal_MPIAIJ,
2706                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2707                                        MatCreateSubMatricesMPI_MPIAIJ,
2708                                 /*129*/0,
2709                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2710                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2711                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2712                                        0,
2713                                 /*134*/0,
2714                                        0,
2715                                        MatRARt_MPIAIJ_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                 /*139*/MatSetBlockSizes_MPIAIJ,
2719                                        0,
2720                                        0,
2721                                        MatFDColoringSetUp_MPIXAIJ,
2722                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2723                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2724 };
2725 
2726 /* ----------------------------------------------------------------------------------------*/
2727 
2728 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2729 {
2730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2731   PetscErrorCode ierr;
2732 
2733   PetscFunctionBegin;
2734   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2735   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2740 {
2741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2746   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2751 {
2752   Mat_MPIAIJ     *b;
2753   PetscErrorCode ierr;
2754 
2755   PetscFunctionBegin;
2756   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2757   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2758   b = (Mat_MPIAIJ*)B->data;
2759 
2760 #if defined(PETSC_USE_CTABLE)
2761   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2762 #else
2763   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2764 #endif
2765   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2766   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2767   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2768 
2769   /* Because the B will have been resized we simply destroy it and create a new one each time */
2770   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2771   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2772   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2773   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2774   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2775   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2776 
2777   if (!B->preallocated) {
2778     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2779     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2780     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2781     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2782     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2783   }
2784 
2785   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2786   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2787   B->preallocated  = PETSC_TRUE;
2788   B->was_assembled = PETSC_FALSE;
2789   B->assembled     = PETSC_FALSE;;
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2794 {
2795   Mat_MPIAIJ     *b;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2800   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2802   b = (Mat_MPIAIJ*)B->data;
2803 
2804 #if defined(PETSC_USE_CTABLE)
2805   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2806 #else
2807   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2808 #endif
2809   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2810   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2811   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2812 
2813   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2814   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2815   B->preallocated  = PETSC_TRUE;
2816   B->was_assembled = PETSC_FALSE;
2817   B->assembled = PETSC_FALSE;
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2822 {
2823   Mat            mat;
2824   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   *newmat = 0;
2829   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2830   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2831   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2832   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2833   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2834   a       = (Mat_MPIAIJ*)mat->data;
2835 
2836   mat->ops->destroy = MatDestroy_MPIAIJ;
2837   mat->factortype   = matin->factortype;
2838   mat->assembled    = PETSC_TRUE;
2839   mat->insertmode   = NOT_SET_VALUES;
2840   mat->preallocated = PETSC_TRUE;
2841 
2842   a->size         = oldmat->size;
2843   a->rank         = oldmat->rank;
2844   a->donotstash   = oldmat->donotstash;
2845   a->roworiented  = oldmat->roworiented;
2846   a->rowindices   = 0;
2847   a->rowvalues    = 0;
2848   a->getrowactive = PETSC_FALSE;
2849 
2850   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2851   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2852 
2853   if (oldmat->colmap) {
2854 #if defined(PETSC_USE_CTABLE)
2855     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2856 #else
2857     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2858     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2859     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2860 #endif
2861   } else a->colmap = 0;
2862   if (oldmat->garray) {
2863     PetscInt len;
2864     len  = oldmat->B->cmap->n;
2865     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2866     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2867     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2868   } else a->garray = 0;
2869 
2870   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2871   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2872   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2873   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2874 
2875   if (oldmat->Mvctx_mpi1) {
2876     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2877     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2878   }
2879 
2880   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2881   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2882   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2883   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2884   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2885   *newmat = mat;
2886   PetscFunctionReturn(0);
2887 }
2888 
2889 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2890 {
2891   PetscBool      isbinary, ishdf5;
2892   PetscErrorCode ierr;
2893 
2894   PetscFunctionBegin;
2895   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2896   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2897   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2898   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2899   if (isbinary) {
2900     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2901   } else if (ishdf5) {
2902 #if defined(PETSC_HAVE_HDF5)
2903     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2904 #else
2905     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2906 #endif
2907   } else {
2908     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2909   }
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2914 {
2915   PetscScalar    *vals,*svals;
2916   MPI_Comm       comm;
2917   PetscErrorCode ierr;
2918   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2919   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2920   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2921   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2922   PetscInt       cend,cstart,n,*rowners;
2923   int            fd;
2924   PetscInt       bs = newMat->rmap->bs;
2925 
2926   PetscFunctionBegin;
2927   /* force binary viewer to load .info file if it has not yet done so */
2928   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2929   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2930   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2931   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2932   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2933   if (!rank) {
2934     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2935     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2936     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2937   }
2938 
2939   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2940   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2941   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2942   if (bs < 0) bs = 1;
2943 
2944   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2945   M    = header[1]; N = header[2];
2946 
2947   /* If global sizes are set, check if they are consistent with that given in the file */
2948   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2949   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2950 
2951   /* determine ownership of all (block) rows */
2952   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2953   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2954   else m = newMat->rmap->n; /* Set by user */
2955 
2956   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2957   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2958 
2959   /* First process needs enough room for process with most rows */
2960   if (!rank) {
2961     mmax = rowners[1];
2962     for (i=2; i<=size; i++) {
2963       mmax = PetscMax(mmax, rowners[i]);
2964     }
2965   } else mmax = -1;             /* unused, but compilers complain */
2966 
2967   rowners[0] = 0;
2968   for (i=2; i<=size; i++) {
2969     rowners[i] += rowners[i-1];
2970   }
2971   rstart = rowners[rank];
2972   rend   = rowners[rank+1];
2973 
2974   /* distribute row lengths to all processors */
2975   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2976   if (!rank) {
2977     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2978     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2979     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2980     for (j=0; j<m; j++) {
2981       procsnz[0] += ourlens[j];
2982     }
2983     for (i=1; i<size; i++) {
2984       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2985       /* calculate the number of nonzeros on each processor */
2986       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2987         procsnz[i] += rowlengths[j];
2988       }
2989       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2990     }
2991     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2992   } else {
2993     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2994   }
2995 
2996   if (!rank) {
2997     /* determine max buffer needed and allocate it */
2998     maxnz = 0;
2999     for (i=0; i<size; i++) {
3000       maxnz = PetscMax(maxnz,procsnz[i]);
3001     }
3002     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3003 
3004     /* read in my part of the matrix column indices  */
3005     nz   = procsnz[0];
3006     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3007     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3008 
3009     /* read in every one elses and ship off */
3010     for (i=1; i<size; i++) {
3011       nz   = procsnz[i];
3012       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3013       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3014     }
3015     ierr = PetscFree(cols);CHKERRQ(ierr);
3016   } else {
3017     /* determine buffer space needed for message */
3018     nz = 0;
3019     for (i=0; i<m; i++) {
3020       nz += ourlens[i];
3021     }
3022     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3023 
3024     /* receive message of column indices*/
3025     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3026   }
3027 
3028   /* determine column ownership if matrix is not square */
3029   if (N != M) {
3030     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3031     else n = newMat->cmap->n;
3032     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3033     cstart = cend - n;
3034   } else {
3035     cstart = rstart;
3036     cend   = rend;
3037     n      = cend - cstart;
3038   }
3039 
3040   /* loop over local rows, determining number of off diagonal entries */
3041   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3042   jj   = 0;
3043   for (i=0; i<m; i++) {
3044     for (j=0; j<ourlens[i]; j++) {
3045       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3046       jj++;
3047     }
3048   }
3049 
3050   for (i=0; i<m; i++) {
3051     ourlens[i] -= offlens[i];
3052   }
3053   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3054 
3055   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3056 
3057   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3058 
3059   for (i=0; i<m; i++) {
3060     ourlens[i] += offlens[i];
3061   }
3062 
3063   if (!rank) {
3064     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3065 
3066     /* read in my part of the matrix numerical values  */
3067     nz   = procsnz[0];
3068     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3069 
3070     /* insert into matrix */
3071     jj      = rstart;
3072     smycols = mycols;
3073     svals   = vals;
3074     for (i=0; i<m; i++) {
3075       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3076       smycols += ourlens[i];
3077       svals   += ourlens[i];
3078       jj++;
3079     }
3080 
3081     /* read in other processors and ship out */
3082     for (i=1; i<size; i++) {
3083       nz   = procsnz[i];
3084       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3085       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3086     }
3087     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3088   } else {
3089     /* receive numeric values */
3090     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3091 
3092     /* receive message of values*/
3093     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3094 
3095     /* insert into matrix */
3096     jj      = rstart;
3097     smycols = mycols;
3098     svals   = vals;
3099     for (i=0; i<m; i++) {
3100       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3101       smycols += ourlens[i];
3102       svals   += ourlens[i];
3103       jj++;
3104     }
3105   }
3106   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3107   ierr = PetscFree(vals);CHKERRQ(ierr);
3108   ierr = PetscFree(mycols);CHKERRQ(ierr);
3109   ierr = PetscFree(rowners);CHKERRQ(ierr);
3110   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 /* Not scalable because of ISAllGather() unless getting all columns. */
3116 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3117 {
3118   PetscErrorCode ierr;
3119   IS             iscol_local;
3120   PetscBool      isstride;
3121   PetscMPIInt    lisstride=0,gisstride;
3122 
3123   PetscFunctionBegin;
3124   /* check if we are grabbing all columns*/
3125   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3126 
3127   if (isstride) {
3128     PetscInt  start,len,mstart,mlen;
3129     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3130     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3131     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3132     if (mstart == start && mlen-mstart == len) lisstride = 1;
3133   }
3134 
3135   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3136   if (gisstride) {
3137     PetscInt N;
3138     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3139     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3140     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3141     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3142   } else {
3143     PetscInt cbs;
3144     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3145     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3146     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3147   }
3148 
3149   *isseq = iscol_local;
3150   PetscFunctionReturn(0);
3151 }
3152 
3153 /*
3154  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3155  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3156 
3157  Input Parameters:
3158    mat - matrix
3159    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3160            i.e., mat->rstart <= isrow[i] < mat->rend
3161    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3162            i.e., mat->cstart <= iscol[i] < mat->cend
3163  Output Parameter:
3164    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3165    iscol_o - sequential column index set for retrieving mat->B
3166    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3167  */
3168 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3169 {
3170   PetscErrorCode ierr;
3171   Vec            x,cmap;
3172   const PetscInt *is_idx;
3173   PetscScalar    *xarray,*cmaparray;
3174   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3175   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3176   Mat            B=a->B;
3177   Vec            lvec=a->lvec,lcmap;
3178   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3179   MPI_Comm       comm;
3180   VecScatter     Mvctx=a->Mvctx;
3181 
3182   PetscFunctionBegin;
3183   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3184   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3185 
3186   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3187   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3188   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3189   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3190   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3191 
3192   /* Get start indices */
3193   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3194   isstart -= ncols;
3195   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3196 
3197   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3198   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3199   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3200   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3201   for (i=0; i<ncols; i++) {
3202     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3203     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3204     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3205   }
3206   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3207   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3208   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3209 
3210   /* Get iscol_d */
3211   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3212   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3213   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3214 
3215   /* Get isrow_d */
3216   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3217   rstart = mat->rmap->rstart;
3218   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3219   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3220   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3221   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3222 
3223   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3224   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3225   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3226 
3227   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3228   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3229   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3230 
3231   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3232 
3233   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3234   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235 
3236   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3237   /* off-process column indices */
3238   count = 0;
3239   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3240   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3241 
3242   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3243   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3244   for (i=0; i<Bn; i++) {
3245     if (PetscRealPart(xarray[i]) > -1.0) {
3246       idx[count]     = i;                   /* local column index in off-diagonal part B */
3247       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3248       count++;
3249     }
3250   }
3251   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3252   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3253 
3254   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3255   /* cannot ensure iscol_o has same blocksize as iscol! */
3256 
3257   ierr = PetscFree(idx);CHKERRQ(ierr);
3258   *garray = cmap1;
3259 
3260   ierr = VecDestroy(&x);CHKERRQ(ierr);
3261   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3262   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3267 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3268 {
3269   PetscErrorCode ierr;
3270   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3271   Mat            M = NULL;
3272   MPI_Comm       comm;
3273   IS             iscol_d,isrow_d,iscol_o;
3274   Mat            Asub = NULL,Bsub = NULL;
3275   PetscInt       n;
3276 
3277   PetscFunctionBegin;
3278   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3279 
3280   if (call == MAT_REUSE_MATRIX) {
3281     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3282     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3283     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3284 
3285     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3286     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3287 
3288     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3289     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3290 
3291     /* Update diagonal and off-diagonal portions of submat */
3292     asub = (Mat_MPIAIJ*)(*submat)->data;
3293     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3294     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3295     if (n) {
3296       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3297     }
3298     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3299     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3300 
3301   } else { /* call == MAT_INITIAL_MATRIX) */
3302     const PetscInt *garray;
3303     PetscInt        BsubN;
3304 
3305     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3306     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3307 
3308     /* Create local submatrices Asub and Bsub */
3309     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3310     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3311 
3312     /* Create submatrix M */
3313     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3314 
3315     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3316     asub = (Mat_MPIAIJ*)M->data;
3317 
3318     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3319     n = asub->B->cmap->N;
3320     if (BsubN > n) {
3321       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3322       const PetscInt *idx;
3323       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3324       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3325 
3326       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3327       j = 0;
3328       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3329       for (i=0; i<n; i++) {
3330         if (j >= BsubN) break;
3331         while (subgarray[i] > garray[j]) j++;
3332 
3333         if (subgarray[i] == garray[j]) {
3334           idx_new[i] = idx[j++];
3335         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3336       }
3337       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3338 
3339       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3340       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3341 
3342     } else if (BsubN < n) {
3343       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3344     }
3345 
3346     ierr = PetscFree(garray);CHKERRQ(ierr);
3347     *submat = M;
3348 
3349     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3350     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3351     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3352 
3353     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3354     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3355 
3356     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3357     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3358   }
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3363 {
3364   PetscErrorCode ierr;
3365   IS             iscol_local=NULL,isrow_d;
3366   PetscInt       csize;
3367   PetscInt       n,i,j,start,end;
3368   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3369   MPI_Comm       comm;
3370 
3371   PetscFunctionBegin;
3372   /* If isrow has same processor distribution as mat,
3373      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3374   if (call == MAT_REUSE_MATRIX) {
3375     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3376     if (isrow_d) {
3377       sameRowDist  = PETSC_TRUE;
3378       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3379     } else {
3380       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3381       if (iscol_local) {
3382         sameRowDist  = PETSC_TRUE;
3383         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3384       }
3385     }
3386   } else {
3387     /* Check if isrow has same processor distribution as mat */
3388     sameDist[0] = PETSC_FALSE;
3389     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3390     if (!n) {
3391       sameDist[0] = PETSC_TRUE;
3392     } else {
3393       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3394       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3395       if (i >= start && j < end) {
3396         sameDist[0] = PETSC_TRUE;
3397       }
3398     }
3399 
3400     /* Check if iscol has same processor distribution as mat */
3401     sameDist[1] = PETSC_FALSE;
3402     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3403     if (!n) {
3404       sameDist[1] = PETSC_TRUE;
3405     } else {
3406       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3407       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3408       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3409     }
3410 
3411     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3412     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3413     sameRowDist = tsameDist[0];
3414   }
3415 
3416   if (sameRowDist) {
3417     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3418       /* isrow and iscol have same processor distribution as mat */
3419       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3420       PetscFunctionReturn(0);
3421     } else { /* sameRowDist */
3422       /* isrow has same processor distribution as mat */
3423       if (call == MAT_INITIAL_MATRIX) {
3424         PetscBool sorted;
3425         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3426         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3427         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3428         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3429 
3430         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3431         if (sorted) {
3432           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3433           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3434           PetscFunctionReturn(0);
3435         }
3436       } else { /* call == MAT_REUSE_MATRIX */
3437         IS    iscol_sub;
3438         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3439         if (iscol_sub) {
3440           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3441           PetscFunctionReturn(0);
3442         }
3443       }
3444     }
3445   }
3446 
3447   /* General case: iscol -> iscol_local which has global size of iscol */
3448   if (call == MAT_REUSE_MATRIX) {
3449     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3450     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3451   } else {
3452     if (!iscol_local) {
3453       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3454     }
3455   }
3456 
3457   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3458   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3459 
3460   if (call == MAT_INITIAL_MATRIX) {
3461     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3462     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3463   }
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 /*@C
3468      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3469          and "off-diagonal" part of the matrix in CSR format.
3470 
3471    Collective on MPI_Comm
3472 
3473    Input Parameters:
3474 +  comm - MPI communicator
3475 .  A - "diagonal" portion of matrix
3476 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3477 -  garray - global index of B columns
3478 
3479    Output Parameter:
3480 .   mat - the matrix, with input A as its local diagonal matrix
3481    Level: advanced
3482 
3483    Notes:
3484        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3485        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3486 
3487 .seealso: MatCreateMPIAIJWithSplitArrays()
3488 @*/
3489 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3490 {
3491   PetscErrorCode ierr;
3492   Mat_MPIAIJ     *maij;
3493   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3494   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3495   PetscScalar    *oa=b->a;
3496   Mat            Bnew;
3497   PetscInt       m,n,N;
3498 
3499   PetscFunctionBegin;
3500   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3501   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3502   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3503   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3504   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3505   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3506 
3507   /* Get global columns of mat */
3508   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3509 
3510   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3511   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3512   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3513   maij = (Mat_MPIAIJ*)(*mat)->data;
3514 
3515   (*mat)->preallocated = PETSC_TRUE;
3516 
3517   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3518   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3519 
3520   /* Set A as diagonal portion of *mat */
3521   maij->A = A;
3522 
3523   nz = oi[m];
3524   for (i=0; i<nz; i++) {
3525     col   = oj[i];
3526     oj[i] = garray[col];
3527   }
3528 
3529    /* Set Bnew as off-diagonal portion of *mat */
3530   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3531   bnew        = (Mat_SeqAIJ*)Bnew->data;
3532   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3533   maij->B     = Bnew;
3534 
3535   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3536 
3537   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3538   b->free_a       = PETSC_FALSE;
3539   b->free_ij      = PETSC_FALSE;
3540   ierr = MatDestroy(&B);CHKERRQ(ierr);
3541 
3542   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3543   bnew->free_a       = PETSC_TRUE;
3544   bnew->free_ij      = PETSC_TRUE;
3545 
3546   /* condense columns of maij->B */
3547   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3548   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3549   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3550   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3551   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3552   PetscFunctionReturn(0);
3553 }
3554 
3555 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3556 
3557 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3558 {
3559   PetscErrorCode ierr;
3560   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3561   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3562   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3563   Mat            M,Msub,B=a->B;
3564   MatScalar      *aa;
3565   Mat_SeqAIJ     *aij;
3566   PetscInt       *garray = a->garray,*colsub,Ncols;
3567   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3568   IS             iscol_sub,iscmap;
3569   const PetscInt *is_idx,*cmap;
3570   PetscBool      allcolumns=PETSC_FALSE;
3571   MPI_Comm       comm;
3572 
3573   PetscFunctionBegin;
3574   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3575 
3576   if (call == MAT_REUSE_MATRIX) {
3577     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3578     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3579     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3580 
3581     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3582     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3583 
3584     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3585     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3586 
3587     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3588 
3589   } else { /* call == MAT_INITIAL_MATRIX) */
3590     PetscBool flg;
3591 
3592     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3593     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3594 
3595     /* (1) iscol -> nonscalable iscol_local */
3596     /* Check for special case: each processor gets entire matrix columns */
3597     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3598     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3599     if (allcolumns) {
3600       iscol_sub = iscol_local;
3601       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3602       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3603 
3604     } else {
3605       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3606       PetscInt *idx,*cmap1,k;
3607       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3608       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3609       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3610       count = 0;
3611       k     = 0;
3612       for (i=0; i<Ncols; i++) {
3613         j = is_idx[i];
3614         if (j >= cstart && j < cend) {
3615           /* diagonal part of mat */
3616           idx[count]     = j;
3617           cmap1[count++] = i; /* column index in submat */
3618         } else if (Bn) {
3619           /* off-diagonal part of mat */
3620           if (j == garray[k]) {
3621             idx[count]     = j;
3622             cmap1[count++] = i;  /* column index in submat */
3623           } else if (j > garray[k]) {
3624             while (j > garray[k] && k < Bn-1) k++;
3625             if (j == garray[k]) {
3626               idx[count]     = j;
3627               cmap1[count++] = i; /* column index in submat */
3628             }
3629           }
3630         }
3631       }
3632       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3633 
3634       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3635       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3636       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3637 
3638       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3639     }
3640 
3641     /* (3) Create sequential Msub */
3642     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3643   }
3644 
3645   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3646   aij  = (Mat_SeqAIJ*)(Msub)->data;
3647   ii   = aij->i;
3648   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3649 
3650   /*
3651       m - number of local rows
3652       Ncols - number of columns (same on all processors)
3653       rstart - first row in new global matrix generated
3654   */
3655   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3656 
3657   if (call == MAT_INITIAL_MATRIX) {
3658     /* (4) Create parallel newmat */
3659     PetscMPIInt    rank,size;
3660     PetscInt       csize;
3661 
3662     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3663     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3664 
3665     /*
3666         Determine the number of non-zeros in the diagonal and off-diagonal
3667         portions of the matrix in order to do correct preallocation
3668     */
3669 
3670     /* first get start and end of "diagonal" columns */
3671     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3672     if (csize == PETSC_DECIDE) {
3673       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3674       if (mglobal == Ncols) { /* square matrix */
3675         nlocal = m;
3676       } else {
3677         nlocal = Ncols/size + ((Ncols % size) > rank);
3678       }
3679     } else {
3680       nlocal = csize;
3681     }
3682     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3683     rstart = rend - nlocal;
3684     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3685 
3686     /* next, compute all the lengths */
3687     jj    = aij->j;
3688     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3689     olens = dlens + m;
3690     for (i=0; i<m; i++) {
3691       jend = ii[i+1] - ii[i];
3692       olen = 0;
3693       dlen = 0;
3694       for (j=0; j<jend; j++) {
3695         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3696         else dlen++;
3697         jj++;
3698       }
3699       olens[i] = olen;
3700       dlens[i] = dlen;
3701     }
3702 
3703     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3704     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3705 
3706     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3707     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3708     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3709     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3710     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3711     ierr = PetscFree(dlens);CHKERRQ(ierr);
3712 
3713   } else { /* call == MAT_REUSE_MATRIX */
3714     M    = *newmat;
3715     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3716     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3717     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3718     /*
3719          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3720        rather than the slower MatSetValues().
3721     */
3722     M->was_assembled = PETSC_TRUE;
3723     M->assembled     = PETSC_FALSE;
3724   }
3725 
3726   /* (5) Set values of Msub to *newmat */
3727   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3728   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3729 
3730   jj   = aij->j;
3731   aa   = aij->a;
3732   for (i=0; i<m; i++) {
3733     row = rstart + i;
3734     nz  = ii[i+1] - ii[i];
3735     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3736     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3737     jj += nz; aa += nz;
3738   }
3739   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3740 
3741   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743 
3744   ierr = PetscFree(colsub);CHKERRQ(ierr);
3745 
3746   /* save Msub, iscol_sub and iscmap used in processor for next request */
3747   if (call ==  MAT_INITIAL_MATRIX) {
3748     *newmat = M;
3749     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3750     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3751 
3752     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3753     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3754 
3755     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3756     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3757 
3758     if (iscol_local) {
3759       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3760       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3761     }
3762   }
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 /*
3767     Not great since it makes two copies of the submatrix, first an SeqAIJ
3768   in local and then by concatenating the local matrices the end result.
3769   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3770 
3771   Note: This requires a sequential iscol with all indices.
3772 */
3773 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3774 {
3775   PetscErrorCode ierr;
3776   PetscMPIInt    rank,size;
3777   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3778   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3779   Mat            M,Mreuse;
3780   MatScalar      *aa,*vwork;
3781   MPI_Comm       comm;
3782   Mat_SeqAIJ     *aij;
3783   PetscBool      colflag,allcolumns=PETSC_FALSE;
3784 
3785   PetscFunctionBegin;
3786   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3787   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3788   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3789 
3790   /* Check for special case: each processor gets entire matrix columns */
3791   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3792   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3793   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3794 
3795   if (call ==  MAT_REUSE_MATRIX) {
3796     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3797     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3798     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3799   } else {
3800     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3801   }
3802 
3803   /*
3804       m - number of local rows
3805       n - number of columns (same on all processors)
3806       rstart - first row in new global matrix generated
3807   */
3808   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3809   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3810   if (call == MAT_INITIAL_MATRIX) {
3811     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3812     ii  = aij->i;
3813     jj  = aij->j;
3814 
3815     /*
3816         Determine the number of non-zeros in the diagonal and off-diagonal
3817         portions of the matrix in order to do correct preallocation
3818     */
3819 
3820     /* first get start and end of "diagonal" columns */
3821     if (csize == PETSC_DECIDE) {
3822       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3823       if (mglobal == n) { /* square matrix */
3824         nlocal = m;
3825       } else {
3826         nlocal = n/size + ((n % size) > rank);
3827       }
3828     } else {
3829       nlocal = csize;
3830     }
3831     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3832     rstart = rend - nlocal;
3833     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3834 
3835     /* next, compute all the lengths */
3836     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3837     olens = dlens + m;
3838     for (i=0; i<m; i++) {
3839       jend = ii[i+1] - ii[i];
3840       olen = 0;
3841       dlen = 0;
3842       for (j=0; j<jend; j++) {
3843         if (*jj < rstart || *jj >= rend) olen++;
3844         else dlen++;
3845         jj++;
3846       }
3847       olens[i] = olen;
3848       dlens[i] = dlen;
3849     }
3850     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3851     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3852     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3853     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3854     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3855     ierr = PetscFree(dlens);CHKERRQ(ierr);
3856   } else {
3857     PetscInt ml,nl;
3858 
3859     M    = *newmat;
3860     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3861     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3862     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3863     /*
3864          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3865        rather than the slower MatSetValues().
3866     */
3867     M->was_assembled = PETSC_TRUE;
3868     M->assembled     = PETSC_FALSE;
3869   }
3870   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3871   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3872   ii   = aij->i;
3873   jj   = aij->j;
3874   aa   = aij->a;
3875   for (i=0; i<m; i++) {
3876     row   = rstart + i;
3877     nz    = ii[i+1] - ii[i];
3878     cwork = jj;     jj += nz;
3879     vwork = aa;     aa += nz;
3880     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3881   }
3882 
3883   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885   *newmat = M;
3886 
3887   /* save submatrix used in processor for next request */
3888   if (call ==  MAT_INITIAL_MATRIX) {
3889     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3890     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3891   }
3892   PetscFunctionReturn(0);
3893 }
3894 
3895 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3896 {
3897   PetscInt       m,cstart, cend,j,nnz,i,d;
3898   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3899   const PetscInt *JJ;
3900   PetscScalar    *values;
3901   PetscErrorCode ierr;
3902   PetscBool      nooffprocentries;
3903 
3904   PetscFunctionBegin;
3905   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3906 
3907   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3908   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3909   m      = B->rmap->n;
3910   cstart = B->cmap->rstart;
3911   cend   = B->cmap->rend;
3912   rstart = B->rmap->rstart;
3913 
3914   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3915 
3916 #if defined(PETSC_USE_DEBUG)
3917   for (i=0; i<m && Ii; i++) {
3918     nnz = Ii[i+1]- Ii[i];
3919     JJ  = J + Ii[i];
3920     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3921     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3922     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3923   }
3924 #endif
3925 
3926   for (i=0; i<m && Ii; i++) {
3927     nnz     = Ii[i+1]- Ii[i];
3928     JJ      = J + Ii[i];
3929     nnz_max = PetscMax(nnz_max,nnz);
3930     d       = 0;
3931     for (j=0; j<nnz; j++) {
3932       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3933     }
3934     d_nnz[i] = d;
3935     o_nnz[i] = nnz - d;
3936   }
3937   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3938   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3939 
3940   if (v) values = (PetscScalar*)v;
3941   else {
3942     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3943   }
3944 
3945   for (i=0; i<m && Ii; i++) {
3946     ii   = i + rstart;
3947     nnz  = Ii[i+1]- Ii[i];
3948     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3949   }
3950   nooffprocentries    = B->nooffprocentries;
3951   B->nooffprocentries = PETSC_TRUE;
3952   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   B->nooffprocentries = nooffprocentries;
3955 
3956   if (!v) {
3957     ierr = PetscFree(values);CHKERRQ(ierr);
3958   }
3959   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3960   PetscFunctionReturn(0);
3961 }
3962 
3963 /*@
3964    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3965    (the default parallel PETSc format).
3966 
3967    Collective on MPI_Comm
3968 
3969    Input Parameters:
3970 +  B - the matrix
3971 .  i - the indices into j for the start of each local row (starts with zero)
3972 .  j - the column indices for each local row (starts with zero)
3973 -  v - optional values in the matrix
3974 
3975    Level: developer
3976 
3977    Notes:
3978        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3979      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3980      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3981 
3982        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3983 
3984        The format which is used for the sparse matrix input, is equivalent to a
3985     row-major ordering.. i.e for the following matrix, the input data expected is
3986     as shown
3987 
3988 $        1 0 0
3989 $        2 0 3     P0
3990 $       -------
3991 $        4 5 6     P1
3992 $
3993 $     Process0 [P0]: rows_owned=[0,1]
3994 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3995 $        j =  {0,0,2}  [size = 3]
3996 $        v =  {1,2,3}  [size = 3]
3997 $
3998 $     Process1 [P1]: rows_owned=[2]
3999 $        i =  {0,3}    [size = nrow+1  = 1+1]
4000 $        j =  {0,1,2}  [size = 3]
4001 $        v =  {4,5,6}  [size = 3]
4002 
4003 .keywords: matrix, aij, compressed row, sparse, parallel
4004 
4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4006           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4007 @*/
4008 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4009 {
4010   PetscErrorCode ierr;
4011 
4012   PetscFunctionBegin;
4013   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4014   PetscFunctionReturn(0);
4015 }
4016 
4017 /*@C
4018    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4019    (the default parallel PETSc format).  For good matrix assembly performance
4020    the user should preallocate the matrix storage by setting the parameters
4021    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4022    performance can be increased by more than a factor of 50.
4023 
4024    Collective on MPI_Comm
4025 
4026    Input Parameters:
4027 +  B - the matrix
4028 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4029            (same value is used for all local rows)
4030 .  d_nnz - array containing the number of nonzeros in the various rows of the
4031            DIAGONAL portion of the local submatrix (possibly different for each row)
4032            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4033            The size of this array is equal to the number of local rows, i.e 'm'.
4034            For matrices that will be factored, you must leave room for (and set)
4035            the diagonal entry even if it is zero.
4036 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4037            submatrix (same value is used for all local rows).
4038 -  o_nnz - array containing the number of nonzeros in the various rows of the
4039            OFF-DIAGONAL portion of the local submatrix (possibly different for
4040            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4041            structure. The size of this array is equal to the number
4042            of local rows, i.e 'm'.
4043 
4044    If the *_nnz parameter is given then the *_nz parameter is ignored
4045 
4046    The AIJ format (also called the Yale sparse matrix format or
4047    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4048    storage.  The stored row and column indices begin with zero.
4049    See Users-Manual: ch_mat for details.
4050 
4051    The parallel matrix is partitioned such that the first m0 rows belong to
4052    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4053    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4054 
4055    The DIAGONAL portion of the local submatrix of a processor can be defined
4056    as the submatrix which is obtained by extraction the part corresponding to
4057    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4058    first row that belongs to the processor, r2 is the last row belonging to
4059    the this processor, and c1-c2 is range of indices of the local part of a
4060    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4061    common case of a square matrix, the row and column ranges are the same and
4062    the DIAGONAL part is also square. The remaining portion of the local
4063    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4064 
4065    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4066 
4067    You can call MatGetInfo() to get information on how effective the preallocation was;
4068    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4069    You can also run with the option -info and look for messages with the string
4070    malloc in them to see if additional memory allocation was needed.
4071 
4072    Example usage:
4073 
4074    Consider the following 8x8 matrix with 34 non-zero values, that is
4075    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4076    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4077    as follows:
4078 
4079 .vb
4080             1  2  0  |  0  3  0  |  0  4
4081     Proc0   0  5  6  |  7  0  0  |  8  0
4082             9  0 10  | 11  0  0  | 12  0
4083     -------------------------------------
4084            13  0 14  | 15 16 17  |  0  0
4085     Proc1   0 18  0  | 19 20 21  |  0  0
4086             0  0  0  | 22 23  0  | 24  0
4087     -------------------------------------
4088     Proc2  25 26 27  |  0  0 28  | 29  0
4089            30  0  0  | 31 32 33  |  0 34
4090 .ve
4091 
4092    This can be represented as a collection of submatrices as:
4093 
4094 .vb
4095       A B C
4096       D E F
4097       G H I
4098 .ve
4099 
4100    Where the submatrices A,B,C are owned by proc0, D,E,F are
4101    owned by proc1, G,H,I are owned by proc2.
4102 
4103    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105    The 'M','N' parameters are 8,8, and have the same values on all procs.
4106 
4107    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4108    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4109    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4110    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4111    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4112    matrix, ans [DF] as another SeqAIJ matrix.
4113 
4114    When d_nz, o_nz parameters are specified, d_nz storage elements are
4115    allocated for every row of the local diagonal submatrix, and o_nz
4116    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4117    One way to choose d_nz and o_nz is to use the max nonzerors per local
4118    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4119    In this case, the values of d_nz,o_nz are:
4120 .vb
4121      proc0 : dnz = 2, o_nz = 2
4122      proc1 : dnz = 3, o_nz = 2
4123      proc2 : dnz = 1, o_nz = 4
4124 .ve
4125    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4126    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4127    for proc3. i.e we are using 12+15+10=37 storage locations to store
4128    34 values.
4129 
4130    When d_nnz, o_nnz parameters are specified, the storage is specified
4131    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4132    In the above case the values for d_nnz,o_nnz are:
4133 .vb
4134      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4135      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4136      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4137 .ve
4138    Here the space allocated is sum of all the above values i.e 34, and
4139    hence pre-allocation is perfect.
4140 
4141    Level: intermediate
4142 
4143 .keywords: matrix, aij, compressed row, sparse, parallel
4144 
4145 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4146           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4147 @*/
4148 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4149 {
4150   PetscErrorCode ierr;
4151 
4152   PetscFunctionBegin;
4153   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4154   PetscValidType(B,1);
4155   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 /*@
4160      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4161          CSR format the local rows.
4162 
4163    Collective on MPI_Comm
4164 
4165    Input Parameters:
4166 +  comm - MPI communicator
4167 .  m - number of local rows (Cannot be PETSC_DECIDE)
4168 .  n - This value should be the same as the local size used in creating the
4169        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4170        calculated if N is given) For square matrices n is almost always m.
4171 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4172 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4173 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4174 .   j - column indices
4175 -   a - matrix values
4176 
4177    Output Parameter:
4178 .   mat - the matrix
4179 
4180    Level: intermediate
4181 
4182    Notes:
4183        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4184      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4185      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4186 
4187        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4188 
4189        The format which is used for the sparse matrix input, is equivalent to a
4190     row-major ordering.. i.e for the following matrix, the input data expected is
4191     as shown
4192 
4193 $        1 0 0
4194 $        2 0 3     P0
4195 $       -------
4196 $        4 5 6     P1
4197 $
4198 $     Process0 [P0]: rows_owned=[0,1]
4199 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4200 $        j =  {0,0,2}  [size = 3]
4201 $        v =  {1,2,3}  [size = 3]
4202 $
4203 $     Process1 [P1]: rows_owned=[2]
4204 $        i =  {0,3}    [size = nrow+1  = 1+1]
4205 $        j =  {0,1,2}  [size = 3]
4206 $        v =  {4,5,6}  [size = 3]
4207 
4208 .keywords: matrix, aij, compressed row, sparse, parallel
4209 
4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4211           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4212 @*/
4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4214 {
4215   PetscErrorCode ierr;
4216 
4217   PetscFunctionBegin;
4218   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4219   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4220   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4221   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4222   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4223   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4224   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4225   PetscFunctionReturn(0);
4226 }
4227 
4228 /*@C
4229    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4230    (the default parallel PETSc format).  For good matrix assembly performance
4231    the user should preallocate the matrix storage by setting the parameters
4232    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4233    performance can be increased by more than a factor of 50.
4234 
4235    Collective on MPI_Comm
4236 
4237    Input Parameters:
4238 +  comm - MPI communicator
4239 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4240            This value should be the same as the local size used in creating the
4241            y vector for the matrix-vector product y = Ax.
4242 .  n - This value should be the same as the local size used in creating the
4243        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4244        calculated if N is given) For square matrices n is almost always m.
4245 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4246 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4247 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4248            (same value is used for all local rows)
4249 .  d_nnz - array containing the number of nonzeros in the various rows of the
4250            DIAGONAL portion of the local submatrix (possibly different for each row)
4251            or NULL, if d_nz is used to specify the nonzero structure.
4252            The size of this array is equal to the number of local rows, i.e 'm'.
4253 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4254            submatrix (same value is used for all local rows).
4255 -  o_nnz - array containing the number of nonzeros in the various rows of the
4256            OFF-DIAGONAL portion of the local submatrix (possibly different for
4257            each row) or NULL, if o_nz is used to specify the nonzero
4258            structure. The size of this array is equal to the number
4259            of local rows, i.e 'm'.
4260 
4261    Output Parameter:
4262 .  A - the matrix
4263 
4264    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4265    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4266    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4267 
4268    Notes:
4269    If the *_nnz parameter is given then the *_nz parameter is ignored
4270 
4271    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4272    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4273    storage requirements for this matrix.
4274 
4275    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4276    processor than it must be used on all processors that share the object for
4277    that argument.
4278 
4279    The user MUST specify either the local or global matrix dimensions
4280    (possibly both).
4281 
4282    The parallel matrix is partitioned across processors such that the
4283    first m0 rows belong to process 0, the next m1 rows belong to
4284    process 1, the next m2 rows belong to process 2 etc.. where
4285    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4286    values corresponding to [m x N] submatrix.
4287 
4288    The columns are logically partitioned with the n0 columns belonging
4289    to 0th partition, the next n1 columns belonging to the next
4290    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4291 
4292    The DIAGONAL portion of the local submatrix on any given processor
4293    is the submatrix corresponding to the rows and columns m,n
4294    corresponding to the given processor. i.e diagonal matrix on
4295    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4296    etc. The remaining portion of the local submatrix [m x (N-n)]
4297    constitute the OFF-DIAGONAL portion. The example below better
4298    illustrates this concept.
4299 
4300    For a square global matrix we define each processor's diagonal portion
4301    to be its local rows and the corresponding columns (a square submatrix);
4302    each processor's off-diagonal portion encompasses the remainder of the
4303    local matrix (a rectangular submatrix).
4304 
4305    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4306 
4307    When calling this routine with a single process communicator, a matrix of
4308    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4309    type of communicator, use the construction mechanism
4310 .vb
4311      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4312 .ve
4313 
4314 $     MatCreate(...,&A);
4315 $     MatSetType(A,MATMPIAIJ);
4316 $     MatSetSizes(A, m,n,M,N);
4317 $     MatMPIAIJSetPreallocation(A,...);
4318 
4319    By default, this format uses inodes (identical nodes) when possible.
4320    We search for consecutive rows with the same nonzero structure, thereby
4321    reusing matrix information to achieve increased efficiency.
4322 
4323    Options Database Keys:
4324 +  -mat_no_inode  - Do not use inodes
4325 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4326 
4327 
4328 
4329    Example usage:
4330 
4331    Consider the following 8x8 matrix with 34 non-zero values, that is
4332    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4333    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4334    as follows
4335 
4336 .vb
4337             1  2  0  |  0  3  0  |  0  4
4338     Proc0   0  5  6  |  7  0  0  |  8  0
4339             9  0 10  | 11  0  0  | 12  0
4340     -------------------------------------
4341            13  0 14  | 15 16 17  |  0  0
4342     Proc1   0 18  0  | 19 20 21  |  0  0
4343             0  0  0  | 22 23  0  | 24  0
4344     -------------------------------------
4345     Proc2  25 26 27  |  0  0 28  | 29  0
4346            30  0  0  | 31 32 33  |  0 34
4347 .ve
4348 
4349    This can be represented as a collection of submatrices as
4350 
4351 .vb
4352       A B C
4353       D E F
4354       G H I
4355 .ve
4356 
4357    Where the submatrices A,B,C are owned by proc0, D,E,F are
4358    owned by proc1, G,H,I are owned by proc2.
4359 
4360    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4361    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4362    The 'M','N' parameters are 8,8, and have the same values on all procs.
4363 
4364    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4365    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4366    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4367    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4368    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4369    matrix, ans [DF] as another SeqAIJ matrix.
4370 
4371    When d_nz, o_nz parameters are specified, d_nz storage elements are
4372    allocated for every row of the local diagonal submatrix, and o_nz
4373    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4374    One way to choose d_nz and o_nz is to use the max nonzerors per local
4375    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4376    In this case, the values of d_nz,o_nz are
4377 .vb
4378      proc0 : dnz = 2, o_nz = 2
4379      proc1 : dnz = 3, o_nz = 2
4380      proc2 : dnz = 1, o_nz = 4
4381 .ve
4382    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4383    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4384    for proc3. i.e we are using 12+15+10=37 storage locations to store
4385    34 values.
4386 
4387    When d_nnz, o_nnz parameters are specified, the storage is specified
4388    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4389    In the above case the values for d_nnz,o_nnz are
4390 .vb
4391      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4392      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4393      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4394 .ve
4395    Here the space allocated is sum of all the above values i.e 34, and
4396    hence pre-allocation is perfect.
4397 
4398    Level: intermediate
4399 
4400 .keywords: matrix, aij, compressed row, sparse, parallel
4401 
4402 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4403           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4404 @*/
4405 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4406 {
4407   PetscErrorCode ierr;
4408   PetscMPIInt    size;
4409 
4410   PetscFunctionBegin;
4411   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4412   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4413   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4414   if (size > 1) {
4415     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4416     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4417   } else {
4418     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4419     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4420   }
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4425 {
4426   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4427   PetscBool      flg;
4428   PetscErrorCode ierr;
4429 
4430   PetscFunctionBegin;
4431   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4432   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4433   if (Ad)     *Ad     = a->A;
4434   if (Ao)     *Ao     = a->B;
4435   if (colmap) *colmap = a->garray;
4436   PetscFunctionReturn(0);
4437 }
4438 
4439 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4440 {
4441   PetscErrorCode ierr;
4442   PetscInt       m,N,i,rstart,nnz,Ii;
4443   PetscInt       *indx;
4444   PetscScalar    *values;
4445 
4446   PetscFunctionBegin;
4447   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4448   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4449     PetscInt       *dnz,*onz,sum,bs,cbs;
4450 
4451     if (n == PETSC_DECIDE) {
4452       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4453     }
4454     /* Check sum(n) = N */
4455     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4456     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4457 
4458     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4459     rstart -= m;
4460 
4461     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4462     for (i=0; i<m; i++) {
4463       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4464       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4465       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4466     }
4467 
4468     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4469     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4470     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4471     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4472     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4473     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4474     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4475     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4476   }
4477 
4478   /* numeric phase */
4479   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4480   for (i=0; i<m; i++) {
4481     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4482     Ii   = i + rstart;
4483     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4484     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4485   }
4486   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4487   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4492 {
4493   PetscErrorCode    ierr;
4494   PetscMPIInt       rank;
4495   PetscInt          m,N,i,rstart,nnz;
4496   size_t            len;
4497   const PetscInt    *indx;
4498   PetscViewer       out;
4499   char              *name;
4500   Mat               B;
4501   const PetscScalar *values;
4502 
4503   PetscFunctionBegin;
4504   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4505   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4506   /* Should this be the type of the diagonal block of A? */
4507   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4508   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4509   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4510   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4511   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4512   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4513   for (i=0; i<m; i++) {
4514     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4515     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4516     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4517   }
4518   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4519   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520 
4521   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4522   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4523   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4524   sprintf(name,"%s.%d",outfile,rank);
4525   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4526   ierr = PetscFree(name);CHKERRQ(ierr);
4527   ierr = MatView(B,out);CHKERRQ(ierr);
4528   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4529   ierr = MatDestroy(&B);CHKERRQ(ierr);
4530   PetscFunctionReturn(0);
4531 }
4532 
4533 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4534 {
4535   PetscErrorCode      ierr;
4536   Mat_Merge_SeqsToMPI *merge;
4537   PetscContainer      container;
4538 
4539   PetscFunctionBegin;
4540   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4541   if (container) {
4542     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4555     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4556     ierr = PetscFree(merge);CHKERRQ(ierr);
4557     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4558   }
4559   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 #include <../src/mat/utils/freespace.h>
4564 #include <petscbt.h>
4565 
4566 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4567 {
4568   PetscErrorCode      ierr;
4569   MPI_Comm            comm;
4570   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4571   PetscMPIInt         size,rank,taga,*len_s;
4572   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4573   PetscInt            proc,m;
4574   PetscInt            **buf_ri,**buf_rj;
4575   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4576   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4577   MPI_Request         *s_waits,*r_waits;
4578   MPI_Status          *status;
4579   MatScalar           *aa=a->a;
4580   MatScalar           **abuf_r,*ba_i;
4581   Mat_Merge_SeqsToMPI *merge;
4582   PetscContainer      container;
4583 
4584   PetscFunctionBegin;
4585   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4586   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4587 
4588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4589   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4590 
4591   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4592   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4593 
4594   bi     = merge->bi;
4595   bj     = merge->bj;
4596   buf_ri = merge->buf_ri;
4597   buf_rj = merge->buf_rj;
4598 
4599   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4600   owners = merge->rowmap->range;
4601   len_s  = merge->len_s;
4602 
4603   /* send and recv matrix values */
4604   /*-----------------------------*/
4605   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4606   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4607 
4608   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4609   for (proc=0,k=0; proc<size; proc++) {
4610     if (!len_s[proc]) continue;
4611     i    = owners[proc];
4612     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4613     k++;
4614   }
4615 
4616   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4617   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4618   ierr = PetscFree(status);CHKERRQ(ierr);
4619 
4620   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4621   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4622 
4623   /* insert mat values of mpimat */
4624   /*----------------------------*/
4625   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4626   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4627 
4628   for (k=0; k<merge->nrecv; k++) {
4629     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4630     nrows       = *(buf_ri_k[k]);
4631     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4632     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4633   }
4634 
4635   /* set values of ba */
4636   m = merge->rowmap->n;
4637   for (i=0; i<m; i++) {
4638     arow = owners[rank] + i;
4639     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4640     bnzi = bi[i+1] - bi[i];
4641     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4642 
4643     /* add local non-zero vals of this proc's seqmat into ba */
4644     anzi   = ai[arow+1] - ai[arow];
4645     aj     = a->j + ai[arow];
4646     aa     = a->a + ai[arow];
4647     nextaj = 0;
4648     for (j=0; nextaj<anzi; j++) {
4649       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4650         ba_i[j] += aa[nextaj++];
4651       }
4652     }
4653 
4654     /* add received vals into ba */
4655     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4656       /* i-th row */
4657       if (i == *nextrow[k]) {
4658         anzi   = *(nextai[k]+1) - *nextai[k];
4659         aj     = buf_rj[k] + *(nextai[k]);
4660         aa     = abuf_r[k] + *(nextai[k]);
4661         nextaj = 0;
4662         for (j=0; nextaj<anzi; j++) {
4663           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4664             ba_i[j] += aa[nextaj++];
4665           }
4666         }
4667         nextrow[k]++; nextai[k]++;
4668       }
4669     }
4670     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4671   }
4672   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4673   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4674 
4675   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4676   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4677   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4678   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4679   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4680   PetscFunctionReturn(0);
4681 }
4682 
4683 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4684 {
4685   PetscErrorCode      ierr;
4686   Mat                 B_mpi;
4687   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4688   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4689   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4690   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4691   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4692   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4693   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4694   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4695   MPI_Status          *status;
4696   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4697   PetscBT             lnkbt;
4698   Mat_Merge_SeqsToMPI *merge;
4699   PetscContainer      container;
4700 
4701   PetscFunctionBegin;
4702   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4703 
4704   /* make sure it is a PETSc comm */
4705   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4706   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4707   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4708 
4709   ierr = PetscNew(&merge);CHKERRQ(ierr);
4710   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4711 
4712   /* determine row ownership */
4713   /*---------------------------------------------------------*/
4714   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4715   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4716   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4717   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4718   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4719   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4720   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4721 
4722   m      = merge->rowmap->n;
4723   owners = merge->rowmap->range;
4724 
4725   /* determine the number of messages to send, their lengths */
4726   /*---------------------------------------------------------*/
4727   len_s = merge->len_s;
4728 
4729   len          = 0; /* length of buf_si[] */
4730   merge->nsend = 0;
4731   for (proc=0; proc<size; proc++) {
4732     len_si[proc] = 0;
4733     if (proc == rank) {
4734       len_s[proc] = 0;
4735     } else {
4736       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4737       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4738     }
4739     if (len_s[proc]) {
4740       merge->nsend++;
4741       nrows = 0;
4742       for (i=owners[proc]; i<owners[proc+1]; i++) {
4743         if (ai[i+1] > ai[i]) nrows++;
4744       }
4745       len_si[proc] = 2*(nrows+1);
4746       len         += len_si[proc];
4747     }
4748   }
4749 
4750   /* determine the number and length of messages to receive for ij-structure */
4751   /*-------------------------------------------------------------------------*/
4752   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4753   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4754 
4755   /* post the Irecv of j-structure */
4756   /*-------------------------------*/
4757   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4758   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4759 
4760   /* post the Isend of j-structure */
4761   /*--------------------------------*/
4762   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4763 
4764   for (proc=0, k=0; proc<size; proc++) {
4765     if (!len_s[proc]) continue;
4766     i    = owners[proc];
4767     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4768     k++;
4769   }
4770 
4771   /* receives and sends of j-structure are complete */
4772   /*------------------------------------------------*/
4773   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4774   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4775 
4776   /* send and recv i-structure */
4777   /*---------------------------*/
4778   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4779   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4780 
4781   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4782   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4783   for (proc=0,k=0; proc<size; proc++) {
4784     if (!len_s[proc]) continue;
4785     /* form outgoing message for i-structure:
4786          buf_si[0]:                 nrows to be sent
4787                [1:nrows]:           row index (global)
4788                [nrows+1:2*nrows+1]: i-structure index
4789     */
4790     /*-------------------------------------------*/
4791     nrows       = len_si[proc]/2 - 1;
4792     buf_si_i    = buf_si + nrows+1;
4793     buf_si[0]   = nrows;
4794     buf_si_i[0] = 0;
4795     nrows       = 0;
4796     for (i=owners[proc]; i<owners[proc+1]; i++) {
4797       anzi = ai[i+1] - ai[i];
4798       if (anzi) {
4799         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4800         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4801         nrows++;
4802       }
4803     }
4804     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4805     k++;
4806     buf_si += len_si[proc];
4807   }
4808 
4809   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4810   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4811 
4812   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4813   for (i=0; i<merge->nrecv; i++) {
4814     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4815   }
4816 
4817   ierr = PetscFree(len_si);CHKERRQ(ierr);
4818   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4819   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4820   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4821   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4822   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4823   ierr = PetscFree(status);CHKERRQ(ierr);
4824 
4825   /* compute a local seq matrix in each processor */
4826   /*----------------------------------------------*/
4827   /* allocate bi array and free space for accumulating nonzero column info */
4828   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4829   bi[0] = 0;
4830 
4831   /* create and initialize a linked list */
4832   nlnk = N+1;
4833   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4834 
4835   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4836   len  = ai[owners[rank+1]] - ai[owners[rank]];
4837   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4838 
4839   current_space = free_space;
4840 
4841   /* determine symbolic info for each local row */
4842   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4843 
4844   for (k=0; k<merge->nrecv; k++) {
4845     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4846     nrows       = *buf_ri_k[k];
4847     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4848     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4849   }
4850 
4851   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4852   len  = 0;
4853   for (i=0; i<m; i++) {
4854     bnzi = 0;
4855     /* add local non-zero cols of this proc's seqmat into lnk */
4856     arow  = owners[rank] + i;
4857     anzi  = ai[arow+1] - ai[arow];
4858     aj    = a->j + ai[arow];
4859     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4860     bnzi += nlnk;
4861     /* add received col data into lnk */
4862     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4863       if (i == *nextrow[k]) { /* i-th row */
4864         anzi  = *(nextai[k]+1) - *nextai[k];
4865         aj    = buf_rj[k] + *nextai[k];
4866         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4867         bnzi += nlnk;
4868         nextrow[k]++; nextai[k]++;
4869       }
4870     }
4871     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4872 
4873     /* if free space is not available, make more free space */
4874     if (current_space->local_remaining<bnzi) {
4875       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4876       nspacedouble++;
4877     }
4878     /* copy data into free space, then initialize lnk */
4879     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4880     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4881 
4882     current_space->array           += bnzi;
4883     current_space->local_used      += bnzi;
4884     current_space->local_remaining -= bnzi;
4885 
4886     bi[i+1] = bi[i] + bnzi;
4887   }
4888 
4889   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4890 
4891   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4892   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4893   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4894 
4895   /* create symbolic parallel matrix B_mpi */
4896   /*---------------------------------------*/
4897   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4898   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4899   if (n==PETSC_DECIDE) {
4900     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4901   } else {
4902     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4903   }
4904   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4905   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4906   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4907   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4908   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4909 
4910   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4911   B_mpi->assembled    = PETSC_FALSE;
4912   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4913   merge->bi           = bi;
4914   merge->bj           = bj;
4915   merge->buf_ri       = buf_ri;
4916   merge->buf_rj       = buf_rj;
4917   merge->coi          = NULL;
4918   merge->coj          = NULL;
4919   merge->owners_co    = NULL;
4920 
4921   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4922 
4923   /* attach the supporting struct to B_mpi for reuse */
4924   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4925   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4926   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4927   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4928   *mpimat = B_mpi;
4929 
4930   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4931   PetscFunctionReturn(0);
4932 }
4933 
4934 /*@C
4935       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4936                  matrices from each processor
4937 
4938     Collective on MPI_Comm
4939 
4940    Input Parameters:
4941 +    comm - the communicators the parallel matrix will live on
4942 .    seqmat - the input sequential matrices
4943 .    m - number of local rows (or PETSC_DECIDE)
4944 .    n - number of local columns (or PETSC_DECIDE)
4945 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4946 
4947    Output Parameter:
4948 .    mpimat - the parallel matrix generated
4949 
4950     Level: advanced
4951 
4952    Notes:
4953      The dimensions of the sequential matrix in each processor MUST be the same.
4954      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4955      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4956 @*/
4957 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4958 {
4959   PetscErrorCode ierr;
4960   PetscMPIInt    size;
4961 
4962   PetscFunctionBegin;
4963   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4964   if (size == 1) {
4965     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4966     if (scall == MAT_INITIAL_MATRIX) {
4967       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4968     } else {
4969       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4970     }
4971     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4972     PetscFunctionReturn(0);
4973   }
4974   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4975   if (scall == MAT_INITIAL_MATRIX) {
4976     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4977   }
4978   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4979   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4980   PetscFunctionReturn(0);
4981 }
4982 
4983 /*@
4984      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4985           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4986           with MatGetSize()
4987 
4988     Not Collective
4989 
4990    Input Parameters:
4991 +    A - the matrix
4992 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4993 
4994    Output Parameter:
4995 .    A_loc - the local sequential matrix generated
4996 
4997     Level: developer
4998 
4999 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5000 
5001 @*/
5002 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5003 {
5004   PetscErrorCode ierr;
5005   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5006   Mat_SeqAIJ     *mat,*a,*b;
5007   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5008   MatScalar      *aa,*ba,*cam;
5009   PetscScalar    *ca;
5010   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5011   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5012   PetscBool      match;
5013   MPI_Comm       comm;
5014   PetscMPIInt    size;
5015 
5016   PetscFunctionBegin;
5017   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5018   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5019   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5020   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5021   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5022 
5023   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5024   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5025   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5026   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5027   aa = a->a; ba = b->a;
5028   if (scall == MAT_INITIAL_MATRIX) {
5029     if (size == 1) {
5030       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5031       PetscFunctionReturn(0);
5032     }
5033 
5034     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5035     ci[0] = 0;
5036     for (i=0; i<am; i++) {
5037       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5038     }
5039     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5040     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5041     k    = 0;
5042     for (i=0; i<am; i++) {
5043       ncols_o = bi[i+1] - bi[i];
5044       ncols_d = ai[i+1] - ai[i];
5045       /* off-diagonal portion of A */
5046       for (jo=0; jo<ncols_o; jo++) {
5047         col = cmap[*bj];
5048         if (col >= cstart) break;
5049         cj[k]   = col; bj++;
5050         ca[k++] = *ba++;
5051       }
5052       /* diagonal portion of A */
5053       for (j=0; j<ncols_d; j++) {
5054         cj[k]   = cstart + *aj++;
5055         ca[k++] = *aa++;
5056       }
5057       /* off-diagonal portion of A */
5058       for (j=jo; j<ncols_o; j++) {
5059         cj[k]   = cmap[*bj++];
5060         ca[k++] = *ba++;
5061       }
5062     }
5063     /* put together the new matrix */
5064     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5065     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5066     /* Since these are PETSc arrays, change flags to free them as necessary. */
5067     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5068     mat->free_a  = PETSC_TRUE;
5069     mat->free_ij = PETSC_TRUE;
5070     mat->nonew   = 0;
5071   } else if (scall == MAT_REUSE_MATRIX) {
5072     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5073     ci = mat->i; cj = mat->j; cam = mat->a;
5074     for (i=0; i<am; i++) {
5075       /* off-diagonal portion of A */
5076       ncols_o = bi[i+1] - bi[i];
5077       for (jo=0; jo<ncols_o; jo++) {
5078         col = cmap[*bj];
5079         if (col >= cstart) break;
5080         *cam++ = *ba++; bj++;
5081       }
5082       /* diagonal portion of A */
5083       ncols_d = ai[i+1] - ai[i];
5084       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5085       /* off-diagonal portion of A */
5086       for (j=jo; j<ncols_o; j++) {
5087         *cam++ = *ba++; bj++;
5088       }
5089     }
5090   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5091   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5092   PetscFunctionReturn(0);
5093 }
5094 
5095 /*@C
5096      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5097 
5098     Not Collective
5099 
5100    Input Parameters:
5101 +    A - the matrix
5102 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5103 -    row, col - index sets of rows and columns to extract (or NULL)
5104 
5105    Output Parameter:
5106 .    A_loc - the local sequential matrix generated
5107 
5108     Level: developer
5109 
5110 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5111 
5112 @*/
5113 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5114 {
5115   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5116   PetscErrorCode ierr;
5117   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5118   IS             isrowa,iscola;
5119   Mat            *aloc;
5120   PetscBool      match;
5121 
5122   PetscFunctionBegin;
5123   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5124   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5125   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5126   if (!row) {
5127     start = A->rmap->rstart; end = A->rmap->rend;
5128     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5129   } else {
5130     isrowa = *row;
5131   }
5132   if (!col) {
5133     start = A->cmap->rstart;
5134     cmap  = a->garray;
5135     nzA   = a->A->cmap->n;
5136     nzB   = a->B->cmap->n;
5137     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5138     ncols = 0;
5139     for (i=0; i<nzB; i++) {
5140       if (cmap[i] < start) idx[ncols++] = cmap[i];
5141       else break;
5142     }
5143     imark = i;
5144     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5145     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5146     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5147   } else {
5148     iscola = *col;
5149   }
5150   if (scall != MAT_INITIAL_MATRIX) {
5151     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5152     aloc[0] = *A_loc;
5153   }
5154   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5155   if (!col) { /* attach global id of condensed columns */
5156     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5157   }
5158   *A_loc = aloc[0];
5159   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5160   if (!row) {
5161     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5162   }
5163   if (!col) {
5164     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5165   }
5166   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5167   PetscFunctionReturn(0);
5168 }
5169 
5170 /*@C
5171     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5172 
5173     Collective on Mat
5174 
5175    Input Parameters:
5176 +    A,B - the matrices in mpiaij format
5177 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5178 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5179 
5180    Output Parameter:
5181 +    rowb, colb - index sets of rows and columns of B to extract
5182 -    B_seq - the sequential matrix generated
5183 
5184     Level: developer
5185 
5186 @*/
5187 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5188 {
5189   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5190   PetscErrorCode ierr;
5191   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5192   IS             isrowb,iscolb;
5193   Mat            *bseq=NULL;
5194 
5195   PetscFunctionBegin;
5196   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5197     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5198   }
5199   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5200 
5201   if (scall == MAT_INITIAL_MATRIX) {
5202     start = A->cmap->rstart;
5203     cmap  = a->garray;
5204     nzA   = a->A->cmap->n;
5205     nzB   = a->B->cmap->n;
5206     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5207     ncols = 0;
5208     for (i=0; i<nzB; i++) {  /* row < local row index */
5209       if (cmap[i] < start) idx[ncols++] = cmap[i];
5210       else break;
5211     }
5212     imark = i;
5213     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5214     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5215     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5216     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5217   } else {
5218     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5219     isrowb  = *rowb; iscolb = *colb;
5220     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5221     bseq[0] = *B_seq;
5222   }
5223   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5224   *B_seq = bseq[0];
5225   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5226   if (!rowb) {
5227     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5228   } else {
5229     *rowb = isrowb;
5230   }
5231   if (!colb) {
5232     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5233   } else {
5234     *colb = iscolb;
5235   }
5236   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5237   PetscFunctionReturn(0);
5238 }
5239 
5240 #include <petsc/private/vecscatterimpl.h>
5241 /*
5242     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5243     of the OFF-DIAGONAL portion of local A
5244 
5245     Collective on Mat
5246 
5247    Input Parameters:
5248 +    A,B - the matrices in mpiaij format
5249 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5250 
5251    Output Parameter:
5252 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5253 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5254 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5255 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5256 
5257     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5258      for this matrix. This is not desirable..
5259 
5260     Level: developer
5261 
5262 */
5263 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5264 {
5265   VecScatter_MPI_General *gen_to,*gen_from;
5266   PetscErrorCode         ierr;
5267   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5268   Mat_SeqAIJ             *b_oth;
5269   VecScatter             ctx;
5270   MPI_Comm               comm;
5271   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5272   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5273   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5274   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5275   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5276   MPI_Request            *rwaits = NULL,*swaits = NULL;
5277   MPI_Status             *sstatus,rstatus;
5278   PetscMPIInt            jj,size;
5279   VecScatterType         type;
5280   PetscBool              mpi1;
5281 
5282   PetscFunctionBegin;
5283   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5284   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5285 
5286   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5287     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5288   }
5289   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5290   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5291 
5292   if (size == 1) {
5293     startsj_s = NULL;
5294     bufa_ptr  = NULL;
5295     *B_oth    = NULL;
5296     PetscFunctionReturn(0);
5297   }
5298 
5299   ctx = a->Mvctx;
5300   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5301   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5302   if (!mpi1) {
5303     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5304      thus create a->Mvctx_mpi1 */
5305     if (!a->Mvctx_mpi1) {
5306       a->Mvctx_mpi1_flg = PETSC_TRUE;
5307       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5308     }
5309     ctx = a->Mvctx_mpi1;
5310   }
5311   tag = ((PetscObject)ctx)->tag;
5312 
5313   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5314   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5315   nrecvs   = gen_from->n;
5316   nsends   = gen_to->n;
5317 
5318   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5319   srow    = gen_to->indices;    /* local row index to be sent */
5320   sstarts = gen_to->starts;
5321   sprocs  = gen_to->procs;
5322   sstatus = gen_to->sstatus;
5323   sbs     = gen_to->bs;
5324   rstarts = gen_from->starts;
5325   rprocs  = gen_from->procs;
5326   rbs     = gen_from->bs;
5327 
5328   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5329   if (scall == MAT_INITIAL_MATRIX) {
5330     /* i-array */
5331     /*---------*/
5332     /*  post receives */
5333     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5334     for (i=0; i<nrecvs; i++) {
5335       rowlen = rvalues + rstarts[i]*rbs;
5336       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5337       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5338     }
5339 
5340     /* pack the outgoing message */
5341     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5342 
5343     sstartsj[0] = 0;
5344     rstartsj[0] = 0;
5345     len         = 0; /* total length of j or a array to be sent */
5346     k           = 0;
5347     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5348     for (i=0; i<nsends; i++) {
5349       rowlen = svalues + sstarts[i]*sbs;
5350       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5351       for (j=0; j<nrows; j++) {
5352         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5353         for (l=0; l<sbs; l++) {
5354           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5355 
5356           rowlen[j*sbs+l] = ncols;
5357 
5358           len += ncols;
5359           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5360         }
5361         k++;
5362       }
5363       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5364 
5365       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5366     }
5367     /* recvs and sends of i-array are completed */
5368     i = nrecvs;
5369     while (i--) {
5370       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5371     }
5372     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5373     ierr = PetscFree(svalues);CHKERRQ(ierr);
5374 
5375     /* allocate buffers for sending j and a arrays */
5376     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5377     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5378 
5379     /* create i-array of B_oth */
5380     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5381 
5382     b_othi[0] = 0;
5383     len       = 0; /* total length of j or a array to be received */
5384     k         = 0;
5385     for (i=0; i<nrecvs; i++) {
5386       rowlen = rvalues + rstarts[i]*rbs;
5387       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5388       for (j=0; j<nrows; j++) {
5389         b_othi[k+1] = b_othi[k] + rowlen[j];
5390         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5391         k++;
5392       }
5393       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5394     }
5395     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5396 
5397     /* allocate space for j and a arrrays of B_oth */
5398     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5399     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5400 
5401     /* j-array */
5402     /*---------*/
5403     /*  post receives of j-array */
5404     for (i=0; i<nrecvs; i++) {
5405       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5406       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5407     }
5408 
5409     /* pack the outgoing message j-array */
5410     k = 0;
5411     for (i=0; i<nsends; i++) {
5412       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5413       bufJ  = bufj+sstartsj[i];
5414       for (j=0; j<nrows; j++) {
5415         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5416         for (ll=0; ll<sbs; ll++) {
5417           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5418           for (l=0; l<ncols; l++) {
5419             *bufJ++ = cols[l];
5420           }
5421           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5422         }
5423       }
5424       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5425     }
5426 
5427     /* recvs and sends of j-array are completed */
5428     i = nrecvs;
5429     while (i--) {
5430       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5431     }
5432     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5433   } else if (scall == MAT_REUSE_MATRIX) {
5434     sstartsj = *startsj_s;
5435     rstartsj = *startsj_r;
5436     bufa     = *bufa_ptr;
5437     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5438     b_otha   = b_oth->a;
5439   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5440 
5441   /* a-array */
5442   /*---------*/
5443   /*  post receives of a-array */
5444   for (i=0; i<nrecvs; i++) {
5445     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5446     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5447   }
5448 
5449   /* pack the outgoing message a-array */
5450   k = 0;
5451   for (i=0; i<nsends; i++) {
5452     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5453     bufA  = bufa+sstartsj[i];
5454     for (j=0; j<nrows; j++) {
5455       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5456       for (ll=0; ll<sbs; ll++) {
5457         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5458         for (l=0; l<ncols; l++) {
5459           *bufA++ = vals[l];
5460         }
5461         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5462       }
5463     }
5464     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5465   }
5466   /* recvs and sends of a-array are completed */
5467   i = nrecvs;
5468   while (i--) {
5469     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5470   }
5471   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5472   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5473 
5474   if (scall == MAT_INITIAL_MATRIX) {
5475     /* put together the new matrix */
5476     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5477 
5478     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5479     /* Since these are PETSc arrays, change flags to free them as necessary. */
5480     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5481     b_oth->free_a  = PETSC_TRUE;
5482     b_oth->free_ij = PETSC_TRUE;
5483     b_oth->nonew   = 0;
5484 
5485     ierr = PetscFree(bufj);CHKERRQ(ierr);
5486     if (!startsj_s || !bufa_ptr) {
5487       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5488       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5489     } else {
5490       *startsj_s = sstartsj;
5491       *startsj_r = rstartsj;
5492       *bufa_ptr  = bufa;
5493     }
5494   }
5495   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5496   PetscFunctionReturn(0);
5497 }
5498 
5499 /*@C
5500   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5501 
5502   Not Collective
5503 
5504   Input Parameters:
5505 . A - The matrix in mpiaij format
5506 
5507   Output Parameter:
5508 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5509 . colmap - A map from global column index to local index into lvec
5510 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5511 
5512   Level: developer
5513 
5514 @*/
5515 #if defined(PETSC_USE_CTABLE)
5516 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5517 #else
5518 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5519 #endif
5520 {
5521   Mat_MPIAIJ *a;
5522 
5523   PetscFunctionBegin;
5524   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5525   PetscValidPointer(lvec, 2);
5526   PetscValidPointer(colmap, 3);
5527   PetscValidPointer(multScatter, 4);
5528   a = (Mat_MPIAIJ*) A->data;
5529   if (lvec) *lvec = a->lvec;
5530   if (colmap) *colmap = a->colmap;
5531   if (multScatter) *multScatter = a->Mvctx;
5532   PetscFunctionReturn(0);
5533 }
5534 
5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5536 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5538 #if defined(PETSC_HAVE_MKL_SPARSE)
5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5540 #endif
5541 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5542 #if defined(PETSC_HAVE_ELEMENTAL)
5543 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5544 #endif
5545 #if defined(PETSC_HAVE_HYPRE)
5546 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5547 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5548 #endif
5549 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5550 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5551 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5552 
5553 /*
5554     Computes (B'*A')' since computing B*A directly is untenable
5555 
5556                n                       p                          p
5557         (              )       (              )         (                  )
5558       m (      A       )  *  n (       B      )   =   m (         C        )
5559         (              )       (              )         (                  )
5560 
5561 */
5562 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5563 {
5564   PetscErrorCode ierr;
5565   Mat            At,Bt,Ct;
5566 
5567   PetscFunctionBegin;
5568   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5569   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5570   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5571   ierr = MatDestroy(&At);CHKERRQ(ierr);
5572   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5573   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5574   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5575   PetscFunctionReturn(0);
5576 }
5577 
5578 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5579 {
5580   PetscErrorCode ierr;
5581   PetscInt       m=A->rmap->n,n=B->cmap->n;
5582   Mat            Cmat;
5583 
5584   PetscFunctionBegin;
5585   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5586   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5587   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5588   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5589   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5590   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5591   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5592   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5593 
5594   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5595 
5596   *C = Cmat;
5597   PetscFunctionReturn(0);
5598 }
5599 
5600 /* ----------------------------------------------------------------*/
5601 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5602 {
5603   PetscErrorCode ierr;
5604 
5605   PetscFunctionBegin;
5606   if (scall == MAT_INITIAL_MATRIX) {
5607     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5608     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5609     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5610   }
5611   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5612   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5613   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5614   PetscFunctionReturn(0);
5615 }
5616 
5617 /*MC
5618    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5619 
5620    Options Database Keys:
5621 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5622 
5623   Level: beginner
5624 
5625 .seealso: MatCreateAIJ()
5626 M*/
5627 
5628 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5629 {
5630   Mat_MPIAIJ     *b;
5631   PetscErrorCode ierr;
5632   PetscMPIInt    size;
5633 
5634   PetscFunctionBegin;
5635   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5636 
5637   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5638   B->data       = (void*)b;
5639   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5640   B->assembled  = PETSC_FALSE;
5641   B->insertmode = NOT_SET_VALUES;
5642   b->size       = size;
5643 
5644   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5645 
5646   /* build cache for off array entries formed */
5647   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5648 
5649   b->donotstash  = PETSC_FALSE;
5650   b->colmap      = 0;
5651   b->garray      = 0;
5652   b->roworiented = PETSC_TRUE;
5653 
5654   /* stuff used for matrix vector multiply */
5655   b->lvec  = NULL;
5656   b->Mvctx = NULL;
5657 
5658   /* stuff for MatGetRow() */
5659   b->rowindices   = 0;
5660   b->rowvalues    = 0;
5661   b->getrowactive = PETSC_FALSE;
5662 
5663   /* flexible pointer used in CUSP/CUSPARSE classes */
5664   b->spptr = NULL;
5665 
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5676 #if defined(PETSC_HAVE_MKL_SPARSE)
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5678 #endif
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5681 #if defined(PETSC_HAVE_ELEMENTAL)
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5683 #endif
5684 #if defined(PETSC_HAVE_HYPRE)
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5686 #endif
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5692 #if defined(PETSC_HAVE_HYPRE)
5693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5694 #endif
5695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5696   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5697   PetscFunctionReturn(0);
5698 }
5699 
5700 /*@C
5701      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5702          and "off-diagonal" part of the matrix in CSR format.
5703 
5704    Collective on MPI_Comm
5705 
5706    Input Parameters:
5707 +  comm - MPI communicator
5708 .  m - number of local rows (Cannot be PETSC_DECIDE)
5709 .  n - This value should be the same as the local size used in creating the
5710        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5711        calculated if N is given) For square matrices n is almost always m.
5712 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5713 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5714 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5715 .   j - column indices
5716 .   a - matrix values
5717 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5718 .   oj - column indices
5719 -   oa - matrix values
5720 
5721    Output Parameter:
5722 .   mat - the matrix
5723 
5724    Level: advanced
5725 
5726    Notes:
5727        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5728        must free the arrays once the matrix has been destroyed and not before.
5729 
5730        The i and j indices are 0 based
5731 
5732        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5733 
5734        This sets local rows and cannot be used to set off-processor values.
5735 
5736        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5737        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5738        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5739        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5740        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5741        communication if it is known that only local entries will be set.
5742 
5743 .keywords: matrix, aij, compressed row, sparse, parallel
5744 
5745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5746           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5747 @*/
5748 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5749 {
5750   PetscErrorCode ierr;
5751   Mat_MPIAIJ     *maij;
5752 
5753   PetscFunctionBegin;
5754   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5755   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5756   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5757   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5758   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5759   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5760   maij = (Mat_MPIAIJ*) (*mat)->data;
5761 
5762   (*mat)->preallocated = PETSC_TRUE;
5763 
5764   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5765   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5766 
5767   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5768   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5769 
5770   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5771   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5774 
5775   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5776   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5777   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5778   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5779   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5780   PetscFunctionReturn(0);
5781 }
5782 
5783 /*
5784     Special version for direct calls from Fortran
5785 */
5786 #include <petsc/private/fortranimpl.h>
5787 
5788 /* Change these macros so can be used in void function */
5789 #undef CHKERRQ
5790 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5791 #undef SETERRQ2
5792 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5793 #undef SETERRQ3
5794 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5795 #undef SETERRQ
5796 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5797 
5798 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5799 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5800 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5801 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5802 #else
5803 #endif
5804 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5805 {
5806   Mat            mat  = *mmat;
5807   PetscInt       m    = *mm, n = *mn;
5808   InsertMode     addv = *maddv;
5809   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5810   PetscScalar    value;
5811   PetscErrorCode ierr;
5812 
5813   MatCheckPreallocated(mat,1);
5814   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5815 
5816 #if defined(PETSC_USE_DEBUG)
5817   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5818 #endif
5819   {
5820     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5821     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5822     PetscBool roworiented = aij->roworiented;
5823 
5824     /* Some Variables required in the macro */
5825     Mat        A                 = aij->A;
5826     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5827     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5828     MatScalar  *aa               = a->a;
5829     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5830     Mat        B                 = aij->B;
5831     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5832     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5833     MatScalar  *ba               = b->a;
5834 
5835     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5836     PetscInt  nonew = a->nonew;
5837     MatScalar *ap1,*ap2;
5838 
5839     PetscFunctionBegin;
5840     for (i=0; i<m; i++) {
5841       if (im[i] < 0) continue;
5842 #if defined(PETSC_USE_DEBUG)
5843       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5844 #endif
5845       if (im[i] >= rstart && im[i] < rend) {
5846         row      = im[i] - rstart;
5847         lastcol1 = -1;
5848         rp1      = aj + ai[row];
5849         ap1      = aa + ai[row];
5850         rmax1    = aimax[row];
5851         nrow1    = ailen[row];
5852         low1     = 0;
5853         high1    = nrow1;
5854         lastcol2 = -1;
5855         rp2      = bj + bi[row];
5856         ap2      = ba + bi[row];
5857         rmax2    = bimax[row];
5858         nrow2    = bilen[row];
5859         low2     = 0;
5860         high2    = nrow2;
5861 
5862         for (j=0; j<n; j++) {
5863           if (roworiented) value = v[i*n+j];
5864           else value = v[i+j*m];
5865           if (in[j] >= cstart && in[j] < cend) {
5866             col = in[j] - cstart;
5867             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5868             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5869           } else if (in[j] < 0) continue;
5870 #if defined(PETSC_USE_DEBUG)
5871           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5872           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5873 #endif
5874           else {
5875             if (mat->was_assembled) {
5876               if (!aij->colmap) {
5877                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5878               }
5879 #if defined(PETSC_USE_CTABLE)
5880               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5881               col--;
5882 #else
5883               col = aij->colmap[in[j]] - 1;
5884 #endif
5885               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5886               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5887                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5888                 col  =  in[j];
5889                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5890                 B     = aij->B;
5891                 b     = (Mat_SeqAIJ*)B->data;
5892                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5893                 rp2   = bj + bi[row];
5894                 ap2   = ba + bi[row];
5895                 rmax2 = bimax[row];
5896                 nrow2 = bilen[row];
5897                 low2  = 0;
5898                 high2 = nrow2;
5899                 bm    = aij->B->rmap->n;
5900                 ba    = b->a;
5901               }
5902             } else col = in[j];
5903             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5904           }
5905         }
5906       } else if (!aij->donotstash) {
5907         if (roworiented) {
5908           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5909         } else {
5910           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5911         }
5912       }
5913     }
5914   }
5915   PetscFunctionReturnVoid();
5916 }
5917