xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 23bebc0b675bd8a09d1b3d46b613bd16b6fd18be)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscMPIInt    size;
1105 
1106   PetscFunctionBegin;
1107   /* Easy test: symmetric diagonal block */
1108   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1109   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1110   if (!*f) PetscFunctionReturn(0);
1111   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1113   if (size == 1) PetscFunctionReturn(0);
1114 
1115   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1116   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1117   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1118   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1119   for (i=0; i<first; i++) notme[i] = i;
1120   for (i=last; i<M; i++) notme[i-last+first] = i;
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1122   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1123   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1124   Aoff = Aoffs[0];
1125   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1126   Boff = Boffs[0];
1127   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1128   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1129   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1130   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1131   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1132   ierr = PetscFree(notme);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1137 {
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* send it on its way */
1154   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155   /* do local part */
1156   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1157   /* receive remote parts */
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1167 {
1168   PetscErrorCode ierr;
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170 
1171   PetscFunctionBegin;
1172   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1173   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1174   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1179 {
1180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1185   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1190 {
1191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195 #if defined(PETSC_USE_LOG)
1196   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1197 #endif
1198   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1199   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1200   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1201   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1202 #if defined(PETSC_USE_CTABLE)
1203   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1204 #else
1205   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1206 #endif
1207   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1208   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1209   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1210   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1211   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1212   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1213   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1214 
1215   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_ELEMENTAL)
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1226 #endif
1227 #if defined(PETSC_HAVE_HYPRE)
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1230 #endif
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1237 {
1238   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1239   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1240   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1241   PetscErrorCode ierr;
1242   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1243   int            fd;
1244   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1245   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1246   PetscScalar    *column_values;
1247   PetscInt       message_count,flowcontrolcount;
1248   FILE           *file;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1252   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1253   nz   = A->nz + B->nz;
1254   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1255   if (!rank) {
1256     header[0] = MAT_FILE_CLASSID;
1257     header[1] = mat->rmap->N;
1258     header[2] = mat->cmap->N;
1259 
1260     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     /* get largest number of rows any processor has */
1263     rlen  = mat->rmap->n;
1264     range = mat->rmap->range;
1265     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1266   } else {
1267     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     rlen = mat->rmap->n;
1269   }
1270 
1271   /* load up the local row counts */
1272   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1273   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1274 
1275   /* store the row lengths to the file */
1276   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1279     for (i=1; i<size; i++) {
1280       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1281       rlen = range[i+1] - range[i];
1282       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     }
1285     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1286   } else {
1287     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1288     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1290   }
1291   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1292 
1293   /* load up the local column indices */
1294   nzmax = nz; /* th processor needs space a largest processor needs */
1295   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1297   cnt   = 0;
1298   for (i=0; i<mat->rmap->n; i++) {
1299     for (j=B->i[i]; j<B->i[i+1]; j++) {
1300       if ((col = garray[B->j[j]]) > cstart) break;
1301       column_indices[cnt++] = col;
1302     }
1303     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1304     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1305   }
1306   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1307 
1308   /* store the column indices to the file */
1309   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1310   if (!rank) {
1311     MPI_Status status;
1312     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     for (i=1; i<size; i++) {
1314       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1315       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1316       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1317       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1319     }
1320     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1321   } else {
1322     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1323     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1324     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1325     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1326   }
1327   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1328 
1329   /* load up the local column values */
1330   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1331   cnt  = 0;
1332   for (i=0; i<mat->rmap->n; i++) {
1333     for (j=B->i[i]; j<B->i[i+1]; j++) {
1334       if (garray[B->j[j]] > cstart) break;
1335       column_values[cnt++] = B->a[j];
1336     }
1337     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1338     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1339   }
1340   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1341 
1342   /* store the column values to the file */
1343   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1344   if (!rank) {
1345     MPI_Status status;
1346     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1350       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1351       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1352       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1353     }
1354     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1355   } else {
1356     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1357     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1359     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1360   }
1361   ierr = PetscFree(column_values);CHKERRQ(ierr);
1362 
1363   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1364   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #include <petscdraw.h>
1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1370 {
1371   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1372   PetscErrorCode    ierr;
1373   PetscMPIInt       rank = aij->rank,size = aij->size;
1374   PetscBool         isdraw,iascii,isbinary;
1375   PetscViewer       sviewer;
1376   PetscViewerFormat format;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1380   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1382   if (iascii) {
1383     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1384     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1385       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1386       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1387       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1388       for (i=0; i<(PetscInt)size; i++) {
1389         nmax = PetscMax(nmax,nz[i]);
1390         nmin = PetscMin(nmin,nz[i]);
1391         navg += nz[i];
1392       }
1393       ierr = PetscFree(nz);CHKERRQ(ierr);
1394       navg = navg/size;
1395       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1396       PetscFunctionReturn(0);
1397     }
1398     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1399     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1400       MatInfo   info;
1401       PetscBool inodes;
1402 
1403       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1404       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1405       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1406       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1407       if (!inodes) {
1408         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1409                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1410       } else {
1411         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1412                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1413       }
1414       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1416       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1420       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1421       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1422       PetscFunctionReturn(0);
1423     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1424       PetscInt inodecount,inodelimit,*inodes;
1425       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1426       if (inodes) {
1427         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1428       } else {
1429         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1430       }
1431       PetscFunctionReturn(0);
1432     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1433       PetscFunctionReturn(0);
1434     }
1435   } else if (isbinary) {
1436     if (size == 1) {
1437       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1438       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1439     } else {
1440       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1441     }
1442     PetscFunctionReturn(0);
1443   } else if (isdraw) {
1444     PetscDraw draw;
1445     PetscBool isnull;
1446     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1447     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1448     if (isnull) PetscFunctionReturn(0);
1449   }
1450 
1451   {
1452     /* assemble the entire matrix onto first processor. */
1453     Mat        A;
1454     Mat_SeqAIJ *Aloc;
1455     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1456     MatScalar  *a;
1457 
1458     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1459     if (!rank) {
1460       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1463     }
1464     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1465     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1466     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1467     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1468     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1469 
1470     /* copy over the A part */
1471     Aloc = (Mat_SeqAIJ*)aij->A->data;
1472     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1473     row  = mat->rmap->rstart;
1474     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1475     for (i=0; i<m; i++) {
1476       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1477       row++;
1478       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1479     }
1480     aj = Aloc->j;
1481     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1482 
1483     /* copy over the B part */
1484     Aloc = (Mat_SeqAIJ*)aij->B->data;
1485     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1486     row  = mat->rmap->rstart;
1487     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1488     ct   = cols;
1489     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1490     for (i=0; i<m; i++) {
1491       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1492       row++;
1493       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1494     }
1495     ierr = PetscFree(ct);CHKERRQ(ierr);
1496     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1497     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1498     /*
1499        Everyone has to call to draw the matrix since the graphics waits are
1500        synchronized across all processors that share the PetscDraw object
1501     */
1502     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1503     if (!rank) {
1504       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1505       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1506     }
1507     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1508     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1509     ierr = MatDestroy(&A);CHKERRQ(ierr);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1515 {
1516   PetscErrorCode ierr;
1517   PetscBool      iascii,isdraw,issocket,isbinary;
1518 
1519   PetscFunctionBegin;
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1521   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1524   if (iascii || isdraw || isbinary || issocket) {
1525     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1531 {
1532   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1533   PetscErrorCode ierr;
1534   Vec            bb1 = 0;
1535   PetscBool      hasop;
1536 
1537   PetscFunctionBegin;
1538   if (flag == SOR_APPLY_UPPER) {
1539     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1540     PetscFunctionReturn(0);
1541   }
1542 
1543   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1544     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1545   }
1546 
1547   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1548     if (flag & SOR_ZERO_INITIAL_GUESS) {
1549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550       its--;
1551     }
1552 
1553     while (its--) {
1554       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557       /* update rhs: bb1 = bb - B*x */
1558       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1560 
1561       /* local sweep */
1562       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1563     }
1564   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1565     if (flag & SOR_ZERO_INITIAL_GUESS) {
1566       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1567       its--;
1568     }
1569     while (its--) {
1570       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572 
1573       /* update rhs: bb1 = bb - B*x */
1574       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1575       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1576 
1577       /* local sweep */
1578       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1579     }
1580   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1581     if (flag & SOR_ZERO_INITIAL_GUESS) {
1582       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1583       its--;
1584     }
1585     while (its--) {
1586       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588 
1589       /* update rhs: bb1 = bb - B*x */
1590       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1591       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1592 
1593       /* local sweep */
1594       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1595     }
1596   } else if (flag & SOR_EISENSTAT) {
1597     Vec xx1;
1598 
1599     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1601 
1602     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (!mat->diag) {
1605       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1606       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1607     }
1608     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1609     if (hasop) {
1610       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1611     } else {
1612       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1613     }
1614     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1615 
1616     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1617 
1618     /* local sweep */
1619     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1620     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1621     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1622   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1623 
1624   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1625 
1626   matin->factorerrortype = mat->A->factorerrortype;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1631 {
1632   Mat            aA,aB,Aperm;
1633   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1634   PetscScalar    *aa,*ba;
1635   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1636   PetscSF        rowsf,sf;
1637   IS             parcolp = NULL;
1638   PetscBool      done;
1639   PetscErrorCode ierr;
1640 
1641   PetscFunctionBegin;
1642   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1643   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1644   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1645   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1646 
1647   /* Invert row permutation to find out where my rows should go */
1648   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1649   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1650   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1651   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1652   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1653   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1654 
1655   /* Invert column permutation to find out where my columns should go */
1656   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1657   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1658   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1659   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1660   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1661   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1662   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1663 
1664   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1665   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1666   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1667 
1668   /* Find out where my gcols should go */
1669   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1670   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1671   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1672   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1673   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1674   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1675   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1676   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1677 
1678   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1679   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1680   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1681   for (i=0; i<m; i++) {
1682     PetscInt row = rdest[i],rowner;
1683     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1684     for (j=ai[i]; j<ai[i+1]; j++) {
1685       PetscInt cowner,col = cdest[aj[j]];
1686       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1687       if (rowner == cowner) dnnz[i]++;
1688       else onnz[i]++;
1689     }
1690     for (j=bi[i]; j<bi[i+1]; j++) {
1691       PetscInt cowner,col = gcdest[bj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscReal      isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set = PETSC_TRUE;
1825     A->spd     = flg;
1826     if (flg) {
1827       A->symmetric                  = PETSC_TRUE;
1828       A->structurally_symmetric     = PETSC_TRUE;
1829       A->symmetric_set              = PETSC_TRUE;
1830       A->structurally_symmetric_set = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     MatCheckPreallocated(A,1);
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837   case MAT_STRUCTURALLY_SYMMETRIC:
1838     MatCheckPreallocated(A,1);
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_HERMITIAN:
1842     MatCheckPreallocated(A,1);
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   case MAT_SYMMETRY_ETERNAL:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_SUBMAT_SINGLEIS:
1850     A->submat_singleis = flg;
1851     break;
1852   case MAT_STRUCTURE_ONLY:
1853     /* The option is handled directly by MatSetOption() */
1854     break;
1855   default:
1856     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) max = tmp;
1883     }
1884     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz  = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1940 {
1941   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1942 
1943   PetscFunctionBegin;
1944   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1945   aij->getrowactive = PETSC_FALSE;
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1950 {
1951   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1952   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,j,cstart = mat->cmap->rstart;
1955   PetscReal      sum = 0.0;
1956   MatScalar      *v;
1957 
1958   PetscFunctionBegin;
1959   if (aij->size == 1) {
1960     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1961   } else {
1962     if (type == NORM_FROBENIUS) {
1963       v = amat->a;
1964       for (i=0; i<amat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970       }
1971       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1978       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v     = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v   = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2010       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2011     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2017 {
2018   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2019   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2020   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2021   PetscErrorCode ierr;
2022   Mat            B,A_diag,*B_diag;
2023   MatScalar      *array;
2024 
2025   PetscFunctionBegin;
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2031     PetscSFNode          *oloc;
2032     PETSC_UNUSED PetscSF sf;
2033 
2034     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2035     /* compute d_nnz for preallocation */
2036     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2037     for (i=0; i<ai[ma]; i++) {
2038       d_nnz[aj[i]]++;
2039     }
2040     /* compute local off-diagonal contributions */
2041     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2043     /* map those to global */
2044     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2045     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2046     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2047     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2048     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2051 
2052     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2053     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2054     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2055     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2056     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2057     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2058   } else {
2059     B    = *matout;
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2061   }
2062 
2063   b           = (Mat_MPIAIJ*)B->data;
2064   A_diag      = a->A;
2065   B_diag      = &b->A;
2066   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2067   A_diag_ncol = A_diag->cmap->N;
2068   B_diag_ilen = sub_B_diag->ilen;
2069   B_diag_i    = sub_B_diag->i;
2070 
2071   /* Set ilen for diagonal of B */
2072   for (i=0; i<A_diag_ncol; i++) {
2073     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2074   }
2075 
2076   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2077   very quickly (=without using MatSetValues), because all writes are local. */
2078   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2079 
2080   /* copy over the B part */
2081   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2082   array = Bloc->a;
2083   row   = A->rmap->rstart;
2084   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2085   cols_tmp = cols;
2086   for (i=0; i<mb; i++) {
2087     ncol = bi[i+1]-bi[i];
2088     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2089     row++;
2090     array += ncol; cols_tmp += ncol;
2091   }
2092   ierr = PetscFree(cols);CHKERRQ(ierr);
2093 
2094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2097     *matout = B;
2098   } else {
2099     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2105 {
2106   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2107   Mat            a    = aij->A,b = aij->B;
2108   PetscErrorCode ierr;
2109   PetscInt       s1,s2,s3;
2110 
2111   PetscFunctionBegin;
2112   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2113   if (rr) {
2114     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2115     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2116     /* Overlap communication with computation. */
2117     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118   }
2119   if (ll) {
2120     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2121     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2122     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2123   }
2124   /* scale  the diagonal block */
2125   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2126 
2127   if (rr) {
2128     /* Do a scatter end and then right scale the off-diagonal block */
2129     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2130     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2146 {
2147   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2148   Mat            a,b,c,d;
2149   PetscBool      flg;
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   a = matA->A; b = matA->B;
2154   c = matB->A; d = matB->B;
2155 
2156   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2157   if (flg) {
2158     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2159   }
2160   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2165 {
2166   PetscErrorCode ierr;
2167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2168   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2169 
2170   PetscFunctionBegin;
2171   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2172   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2173     /* because of the column compression in the off-processor part of the matrix a->B,
2174        the number of columns in a->B and b->B may be different, hence we cannot call
2175        the MatCopy() directly on the two parts. If need be, we can provide a more
2176        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2177        then copying the submatrices */
2178     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2179   } else {
2180     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2181     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2182   }
2183   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2188 {
2189   PetscErrorCode ierr;
2190 
2191   PetscFunctionBegin;
2192   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 /*
2197    Computes the number of nonzeros per row needed for preallocation when X and Y
2198    have different nonzero structure.
2199 */
2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2201 {
2202   PetscInt       i,j,k,nzx,nzy;
2203 
2204   PetscFunctionBegin;
2205   /* Set the number of nonzeros in the new matrix */
2206   for (i=0; i<m; i++) {
2207     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2208     nzx = xi[i+1] - xi[i];
2209     nzy = yi[i+1] - yi[i];
2210     nnz[i] = 0;
2211     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2212       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2213       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2214       nnz[i]++;
2215     }
2216     for (; k<nzy; k++) nnz[i]++;
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2223 {
2224   PetscErrorCode ierr;
2225   PetscInt       m = Y->rmap->N;
2226   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2227   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2238   PetscBLASInt   bnz,one=1;
2239   Mat_SeqAIJ     *x,*y;
2240 
2241   PetscFunctionBegin;
2242   if (str == SAME_NONZERO_PATTERN) {
2243     PetscScalar alpha = a;
2244     x    = (Mat_SeqAIJ*)xx->A->data;
2245     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2246     y    = (Mat_SeqAIJ*)yy->A->data;
2247     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2248     x    = (Mat_SeqAIJ*)xx->B->data;
2249     y    = (Mat_SeqAIJ*)yy->B->data;
2250     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2251     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2252     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2253   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2254     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2255   } else {
2256     Mat      B;
2257     PetscInt *nnz_d,*nnz_o;
2258     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2259     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2260     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2261     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2262     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2263     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2264     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2265     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2266     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2267     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2268     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2269     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2270     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2271     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2272   }
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2277 
2278 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2279 {
2280 #if defined(PETSC_USE_COMPLEX)
2281   PetscErrorCode ierr;
2282   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2283 
2284   PetscFunctionBegin;
2285   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2286   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2287 #else
2288   PetscFunctionBegin;
2289 #endif
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2300   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2311   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2316 {
2317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2318   PetscErrorCode ierr;
2319   PetscInt       i,*idxb = 0;
2320   PetscScalar    *va,*vb;
2321   Vec            vtmp;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2325   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2326   if (idx) {
2327     for (i=0; i<A->rmap->n; i++) {
2328       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2329     }
2330   }
2331 
2332   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2333   if (idx) {
2334     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2335   }
2336   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2337   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2338 
2339   for (i=0; i<A->rmap->n; i++) {
2340     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2341       va[i] = vb[i];
2342       if (idx) idx[i] = a->garray[idxb[i]];
2343     }
2344   }
2345 
2346   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2347   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2348   ierr = PetscFree(idxb);CHKERRQ(ierr);
2349   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2354 {
2355   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2356   PetscErrorCode ierr;
2357   PetscInt       i,*idxb = 0;
2358   PetscScalar    *va,*vb;
2359   Vec            vtmp;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2363   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2364   if (idx) {
2365     for (i=0; i<A->cmap->n; i++) {
2366       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2367     }
2368   }
2369 
2370   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2371   if (idx) {
2372     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2373   }
2374   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2375   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2376 
2377   for (i=0; i<A->rmap->n; i++) {
2378     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2379       va[i] = vb[i];
2380       if (idx) idx[i] = a->garray[idxb[i]];
2381     }
2382   }
2383 
2384   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2385   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2386   ierr = PetscFree(idxb);CHKERRQ(ierr);
2387   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2392 {
2393   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2394   PetscInt       n      = A->rmap->n;
2395   PetscInt       cstart = A->cmap->rstart;
2396   PetscInt       *cmap  = mat->garray;
2397   PetscInt       *diagIdx, *offdiagIdx;
2398   Vec            diagV, offdiagV;
2399   PetscScalar    *a, *diagA, *offdiagA;
2400   PetscInt       r;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2405   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2406   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2407   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2408   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2409   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2410   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2411   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2412   for (r = 0; r < n; ++r) {
2413     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2414       a[r]   = diagA[r];
2415       idx[r] = cstart + diagIdx[r];
2416     } else {
2417       a[r]   = offdiagA[r];
2418       idx[r] = cmap[offdiagIdx[r]];
2419     }
2420   }
2421   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2423   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2424   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2425   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2426   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2431 {
2432   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2433   PetscInt       n      = A->rmap->n;
2434   PetscInt       cstart = A->cmap->rstart;
2435   PetscInt       *cmap  = mat->garray;
2436   PetscInt       *diagIdx, *offdiagIdx;
2437   Vec            diagV, offdiagV;
2438   PetscScalar    *a, *diagA, *offdiagA;
2439   PetscInt       r;
2440   PetscErrorCode ierr;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2444   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2446   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2447   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2448   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2449   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2450   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   for (r = 0; r < n; ++r) {
2452     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2453       a[r]   = diagA[r];
2454       idx[r] = cstart + diagIdx[r];
2455     } else {
2456       a[r]   = offdiagA[r];
2457       idx[r] = cmap[offdiagIdx[r]];
2458     }
2459   }
2460   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2461   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2463   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2464   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2465   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2470 {
2471   PetscErrorCode ierr;
2472   Mat            *dummy;
2473 
2474   PetscFunctionBegin;
2475   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2476   *newmat = *dummy;
2477   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2482 {
2483   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2484   PetscErrorCode ierr;
2485 
2486   PetscFunctionBegin;
2487   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2488   A->factorerrortype = a->A->factorerrortype;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2493 {
2494   PetscErrorCode ierr;
2495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2496 
2497   PetscFunctionBegin;
2498   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2499   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2500   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2501   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2506 {
2507   PetscFunctionBegin;
2508   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2509   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 /*@
2514    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2515 
2516    Collective on Mat
2517 
2518    Input Parameters:
2519 +    A - the matrix
2520 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2521 
2522  Level: advanced
2523 
2524 @*/
2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2526 {
2527   PetscErrorCode       ierr;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2535 {
2536   PetscErrorCode       ierr;
2537   PetscBool            sc = PETSC_FALSE,flg;
2538 
2539   PetscFunctionBegin;
2540   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2541   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2542   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2543   if (flg) {
2544     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2545   }
2546   ierr = PetscOptionsTail();CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2551 {
2552   PetscErrorCode ierr;
2553   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2554   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2555 
2556   PetscFunctionBegin;
2557   if (!Y->preallocated) {
2558     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2559   } else if (!aij->nz) {
2560     PetscInt nonew = aij->nonew;
2561     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2562     aij->nonew = nonew;
2563   }
2564   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2569 {
2570   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2571   PetscErrorCode ierr;
2572 
2573   PetscFunctionBegin;
2574   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2575   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2576   if (d) {
2577     PetscInt rstart;
2578     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2579     *d += rstart;
2580 
2581   }
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2586 {
2587   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 /* -------------------------------------------------------------------*/
2596 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2597                                        MatGetRow_MPIAIJ,
2598                                        MatRestoreRow_MPIAIJ,
2599                                        MatMult_MPIAIJ,
2600                                 /* 4*/ MatMultAdd_MPIAIJ,
2601                                        MatMultTranspose_MPIAIJ,
2602                                        MatMultTransposeAdd_MPIAIJ,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                 /*10*/ 0,
2607                                        0,
2608                                        0,
2609                                        MatSOR_MPIAIJ,
2610                                        MatTranspose_MPIAIJ,
2611                                 /*15*/ MatGetInfo_MPIAIJ,
2612                                        MatEqual_MPIAIJ,
2613                                        MatGetDiagonal_MPIAIJ,
2614                                        MatDiagonalScale_MPIAIJ,
2615                                        MatNorm_MPIAIJ,
2616                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2617                                        MatAssemblyEnd_MPIAIJ,
2618                                        MatSetOption_MPIAIJ,
2619                                        MatZeroEntries_MPIAIJ,
2620                                 /*24*/ MatZeroRows_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*29*/ MatSetUp_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        MatGetDiagonalBlock_MPIAIJ,
2629                                        0,
2630                                 /*34*/ MatDuplicate_MPIAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*39*/ MatAXPY_MPIAIJ,
2636                                        MatCreateSubMatrices_MPIAIJ,
2637                                        MatIncreaseOverlap_MPIAIJ,
2638                                        MatGetValues_MPIAIJ,
2639                                        MatCopy_MPIAIJ,
2640                                 /*44*/ MatGetRowMax_MPIAIJ,
2641                                        MatScale_MPIAIJ,
2642                                        MatShift_MPIAIJ,
2643                                        MatDiagonalSet_MPIAIJ,
2644                                        MatZeroRowsColumns_MPIAIJ,
2645                                 /*49*/ MatSetRandom_MPIAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2651                                        0,
2652                                        MatSetUnfactored_MPIAIJ,
2653                                        MatPermute_MPIAIJ,
2654                                        0,
2655                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2656                                        MatDestroy_MPIAIJ,
2657                                        MatView_MPIAIJ,
2658                                        0,
2659                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2660                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2661                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2666                                        MatGetRowMinAbs_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*75*/ MatFDColoringApply_AIJ,
2672                                        MatSetFromOptions_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        MatFindZeroDiagonals_MPIAIJ,
2676                                 /*80*/ 0,
2677                                        0,
2678                                        0,
2679                                 /*83*/ MatLoad_MPIAIJ,
2680                                        MatIsSymmetric_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2686                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2687                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2688                                        MatPtAP_MPIAIJ_MPIAIJ,
2689                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2690                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                 /*99*/ 0,
2696                                        0,
2697                                        0,
2698                                        MatConjugate_MPIAIJ,
2699                                        0,
2700                                 /*104*/MatSetValuesRow_MPIAIJ,
2701                                        MatRealPart_MPIAIJ,
2702                                        MatImaginaryPart_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                 /*109*/0,
2706                                        0,
2707                                        MatGetRowMin_MPIAIJ,
2708                                        0,
2709                                        MatMissingDiagonal_MPIAIJ,
2710                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2711                                        0,
2712                                        MatGetGhosts_MPIAIJ,
2713                                        0,
2714                                        0,
2715                                 /*119*/0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        MatGetMultiProcBlock_MPIAIJ,
2720                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2721                                        MatGetColumnNorms_MPIAIJ,
2722                                        MatInvertBlockDiagonal_MPIAIJ,
2723                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2724                                        MatCreateSubMatricesMPI_MPIAIJ,
2725                                 /*129*/0,
2726                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2727                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2728                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2729                                        0,
2730                                 /*134*/0,
2731                                        0,
2732                                        MatRARt_MPIAIJ_MPIAIJ,
2733                                        0,
2734                                        0,
2735                                 /*139*/MatSetBlockSizes_MPIAIJ,
2736                                        0,
2737                                        0,
2738                                        MatFDColoringSetUp_MPIXAIJ,
2739                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2740                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2741 };
2742 
2743 /* ----------------------------------------------------------------------------------------*/
2744 
2745 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2746 {
2747   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2752   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2757 {
2758   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2759   PetscErrorCode ierr;
2760 
2761   PetscFunctionBegin;
2762   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2763   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2768 {
2769   Mat_MPIAIJ     *b;
2770   PetscErrorCode ierr;
2771 
2772   PetscFunctionBegin;
2773   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2774   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2775   b = (Mat_MPIAIJ*)B->data;
2776 
2777 #if defined(PETSC_USE_CTABLE)
2778   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2779 #else
2780   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2781 #endif
2782   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2783   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2784   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2785 
2786   /* Because the B will have been resized we simply destroy it and create a new one each time */
2787   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2788   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2789   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2790   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2791   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2792   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2793 
2794   if (!B->preallocated) {
2795     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2796     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2797     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2798     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2799     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2800   }
2801 
2802   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2803   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2804   B->preallocated  = PETSC_TRUE;
2805   B->was_assembled = PETSC_FALSE;
2806   B->assembled     = PETSC_FALSE;;
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2811 {
2812   Mat_MPIAIJ     *b;
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2817   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2818   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2819   b = (Mat_MPIAIJ*)B->data;
2820 
2821 #if defined(PETSC_USE_CTABLE)
2822   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2823 #else
2824   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2825 #endif
2826   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2827   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2828   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2829 
2830   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2831   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2832   B->preallocated  = PETSC_TRUE;
2833   B->was_assembled = PETSC_FALSE;
2834   B->assembled = PETSC_FALSE;
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2839 {
2840   Mat            mat;
2841   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2842   PetscErrorCode ierr;
2843 
2844   PetscFunctionBegin;
2845   *newmat = 0;
2846   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2847   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2848   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2849   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2850   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2851   a       = (Mat_MPIAIJ*)mat->data;
2852 
2853   mat->factortype   = matin->factortype;
2854   mat->assembled    = PETSC_TRUE;
2855   mat->insertmode   = NOT_SET_VALUES;
2856   mat->preallocated = PETSC_TRUE;
2857 
2858   a->size         = oldmat->size;
2859   a->rank         = oldmat->rank;
2860   a->donotstash   = oldmat->donotstash;
2861   a->roworiented  = oldmat->roworiented;
2862   a->rowindices   = 0;
2863   a->rowvalues    = 0;
2864   a->getrowactive = PETSC_FALSE;
2865 
2866   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2867   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2868 
2869   if (oldmat->colmap) {
2870 #if defined(PETSC_USE_CTABLE)
2871     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2872 #else
2873     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2874     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2875     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2876 #endif
2877   } else a->colmap = 0;
2878   if (oldmat->garray) {
2879     PetscInt len;
2880     len  = oldmat->B->cmap->n;
2881     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2882     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2883     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2884   } else a->garray = 0;
2885 
2886   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2887   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2888   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2889   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2890 
2891   if (oldmat->Mvctx_mpi1) {
2892     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2893     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2894   }
2895 
2896   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2897   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2898   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2899   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2900   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2901   *newmat = mat;
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2906 {
2907   PetscScalar    *vals,*svals;
2908   MPI_Comm       comm;
2909   PetscErrorCode ierr;
2910   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2911   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2912   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2913   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2914   PetscInt       cend,cstart,n,*rowners;
2915   int            fd;
2916   PetscInt       bs = newMat->rmap->bs;
2917 
2918   PetscFunctionBegin;
2919   /* force binary viewer to load .info file if it has not yet done so */
2920   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2921   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2922   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2923   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2924   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2925   if (!rank) {
2926     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2927     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2928     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2929   }
2930 
2931   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2932   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2933   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2934   if (bs < 0) bs = 1;
2935 
2936   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2937   M    = header[1]; N = header[2];
2938 
2939   /* If global sizes are set, check if they are consistent with that given in the file */
2940   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2941   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2942 
2943   /* determine ownership of all (block) rows */
2944   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2945   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2946   else m = newMat->rmap->n; /* Set by user */
2947 
2948   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2949   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2950 
2951   /* First process needs enough room for process with most rows */
2952   if (!rank) {
2953     mmax = rowners[1];
2954     for (i=2; i<=size; i++) {
2955       mmax = PetscMax(mmax, rowners[i]);
2956     }
2957   } else mmax = -1;             /* unused, but compilers complain */
2958 
2959   rowners[0] = 0;
2960   for (i=2; i<=size; i++) {
2961     rowners[i] += rowners[i-1];
2962   }
2963   rstart = rowners[rank];
2964   rend   = rowners[rank+1];
2965 
2966   /* distribute row lengths to all processors */
2967   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2968   if (!rank) {
2969     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2970     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2971     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2972     for (j=0; j<m; j++) {
2973       procsnz[0] += ourlens[j];
2974     }
2975     for (i=1; i<size; i++) {
2976       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2977       /* calculate the number of nonzeros on each processor */
2978       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2979         procsnz[i] += rowlengths[j];
2980       }
2981       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2982     }
2983     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2984   } else {
2985     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2986   }
2987 
2988   if (!rank) {
2989     /* determine max buffer needed and allocate it */
2990     maxnz = 0;
2991     for (i=0; i<size; i++) {
2992       maxnz = PetscMax(maxnz,procsnz[i]);
2993     }
2994     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2995 
2996     /* read in my part of the matrix column indices  */
2997     nz   = procsnz[0];
2998     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2999     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3000 
3001     /* read in every one elses and ship off */
3002     for (i=1; i<size; i++) {
3003       nz   = procsnz[i];
3004       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3005       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3006     }
3007     ierr = PetscFree(cols);CHKERRQ(ierr);
3008   } else {
3009     /* determine buffer space needed for message */
3010     nz = 0;
3011     for (i=0; i<m; i++) {
3012       nz += ourlens[i];
3013     }
3014     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3015 
3016     /* receive message of column indices*/
3017     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3018   }
3019 
3020   /* determine column ownership if matrix is not square */
3021   if (N != M) {
3022     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3023     else n = newMat->cmap->n;
3024     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3025     cstart = cend - n;
3026   } else {
3027     cstart = rstart;
3028     cend   = rend;
3029     n      = cend - cstart;
3030   }
3031 
3032   /* loop over local rows, determining number of off diagonal entries */
3033   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3034   jj   = 0;
3035   for (i=0; i<m; i++) {
3036     for (j=0; j<ourlens[i]; j++) {
3037       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3038       jj++;
3039     }
3040   }
3041 
3042   for (i=0; i<m; i++) {
3043     ourlens[i] -= offlens[i];
3044   }
3045   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3046 
3047   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3048 
3049   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3050 
3051   for (i=0; i<m; i++) {
3052     ourlens[i] += offlens[i];
3053   }
3054 
3055   if (!rank) {
3056     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3057 
3058     /* read in my part of the matrix numerical values  */
3059     nz   = procsnz[0];
3060     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3061 
3062     /* insert into matrix */
3063     jj      = rstart;
3064     smycols = mycols;
3065     svals   = vals;
3066     for (i=0; i<m; i++) {
3067       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3068       smycols += ourlens[i];
3069       svals   += ourlens[i];
3070       jj++;
3071     }
3072 
3073     /* read in other processors and ship out */
3074     for (i=1; i<size; i++) {
3075       nz   = procsnz[i];
3076       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3077       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3078     }
3079     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3080   } else {
3081     /* receive numeric values */
3082     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3083 
3084     /* receive message of values*/
3085     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3086 
3087     /* insert into matrix */
3088     jj      = rstart;
3089     smycols = mycols;
3090     svals   = vals;
3091     for (i=0; i<m; i++) {
3092       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3093       smycols += ourlens[i];
3094       svals   += ourlens[i];
3095       jj++;
3096     }
3097   }
3098   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3099   ierr = PetscFree(vals);CHKERRQ(ierr);
3100   ierr = PetscFree(mycols);CHKERRQ(ierr);
3101   ierr = PetscFree(rowners);CHKERRQ(ierr);
3102   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3103   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 /* Not scalable because of ISAllGather() unless getting all columns. */
3108 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3109 {
3110   PetscErrorCode ierr;
3111   IS             iscol_local;
3112   PetscBool      isstride;
3113   PetscMPIInt    lisstride=0,gisstride;
3114 
3115   PetscFunctionBegin;
3116   /* check if we are grabbing all columns*/
3117   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3118 
3119   if (isstride) {
3120     PetscInt  start,len,mstart,mlen;
3121     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3122     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3123     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3124     if (mstart == start && mlen-mstart == len) lisstride = 1;
3125   }
3126 
3127   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3128   if (gisstride) {
3129     PetscInt N;
3130     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3131     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3132     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3133     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3134   } else {
3135     PetscInt cbs;
3136     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3137     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3138     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3139   }
3140 
3141   *isseq = iscol_local;
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 /*
3146  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3147  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3148 
3149  Input Parameters:
3150    mat - matrix
3151    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3152            i.e., mat->rstart <= isrow[i] < mat->rend
3153    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3154            i.e., mat->cstart <= iscol[i] < mat->cend
3155  Output Parameter:
3156    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3157    iscol_o - sequential column index set for retrieving mat->B
3158    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3159  */
3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3161 {
3162   PetscErrorCode ierr;
3163   Vec            x,cmap;
3164   const PetscInt *is_idx;
3165   PetscScalar    *xarray,*cmaparray;
3166   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3167   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3168   Mat            B=a->B;
3169   Vec            lvec=a->lvec,lcmap;
3170   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3171   MPI_Comm       comm;
3172   VecScatter     Mvctx=a->Mvctx;
3173 
3174   PetscFunctionBegin;
3175   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3176   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3177 
3178   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3179   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3180   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3181   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3182   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3183 
3184   /* Get start indices */
3185   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3186   isstart -= ncols;
3187   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3188 
3189   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3190   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3191   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3192   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3193   for (i=0; i<ncols; i++) {
3194     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3195     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3196     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3197   }
3198   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3199   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3200   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3201 
3202   /* Get iscol_d */
3203   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3204   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3205   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3206 
3207   /* Get isrow_d */
3208   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3209   rstart = mat->rmap->rstart;
3210   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3211   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3212   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3213   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3214 
3215   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3216   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3217   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3218 
3219   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3220   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3221   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3222 
3223   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3224 
3225   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3226   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3227 
3228   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3229   /* off-process column indices */
3230   count = 0;
3231   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3232   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3233 
3234   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3235   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3236   for (i=0; i<Bn; i++) {
3237     if (PetscRealPart(xarray[i]) > -1.0) {
3238       idx[count]     = i;                   /* local column index in off-diagonal part B */
3239       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3240       count++;
3241     }
3242   }
3243   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3244   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3245 
3246   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3247   /* cannot ensure iscol_o has same blocksize as iscol! */
3248 
3249   ierr = PetscFree(idx);CHKERRQ(ierr);
3250   *garray = cmap1;
3251 
3252   ierr = VecDestroy(&x);CHKERRQ(ierr);
3253   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3254   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3260 {
3261   PetscErrorCode ierr;
3262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3263   Mat            M = NULL;
3264   MPI_Comm       comm;
3265   IS             iscol_d,isrow_d,iscol_o;
3266   Mat            Asub = NULL,Bsub = NULL;
3267   PetscInt       n;
3268 
3269   PetscFunctionBegin;
3270   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3271 
3272   if (call == MAT_REUSE_MATRIX) {
3273     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3274     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3275     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3276 
3277     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3278     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3279 
3280     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3281     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3282 
3283     /* Update diagonal and off-diagonal portions of submat */
3284     asub = (Mat_MPIAIJ*)(*submat)->data;
3285     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3286     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3287     if (n) {
3288       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3289     }
3290     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3291     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3292 
3293   } else { /* call == MAT_INITIAL_MATRIX) */
3294     const PetscInt *garray;
3295     PetscInt        BsubN;
3296 
3297     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3298     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3299 
3300     /* Create local submatrices Asub and Bsub */
3301     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3302     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3303 
3304     /* Create submatrix M */
3305     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3306 
3307     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3308     asub = (Mat_MPIAIJ*)M->data;
3309 
3310     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3311     n = asub->B->cmap->N;
3312     if (BsubN > n) {
3313       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3314       const PetscInt *idx;
3315       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3316       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3317 
3318       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3319       j = 0;
3320       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3321       for (i=0; i<n; i++) {
3322         if (j >= BsubN) break;
3323         while (subgarray[i] > garray[j]) j++;
3324 
3325         if (subgarray[i] == garray[j]) {
3326           idx_new[i] = idx[j++];
3327         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3328       }
3329       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3330 
3331       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3332       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3333 
3334     } else if (BsubN < n) {
3335       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3336     }
3337 
3338     ierr = PetscFree(garray);CHKERRQ(ierr);
3339     *submat = M;
3340 
3341     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3342     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3343     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3344 
3345     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3346     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3347 
3348     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3349     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3350   }
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3355 {
3356   PetscErrorCode ierr;
3357   IS             iscol_local=NULL,isrow_d;
3358   PetscInt       csize;
3359   PetscInt       n,i,j,start,end;
3360   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3361   MPI_Comm       comm;
3362 
3363   PetscFunctionBegin;
3364   /* If isrow has same processor distribution as mat,
3365      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3366   if (call == MAT_REUSE_MATRIX) {
3367     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3368     if (isrow_d) {
3369       sameRowDist  = PETSC_TRUE;
3370       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3371     } else {
3372       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3373       if (iscol_local) {
3374         sameRowDist  = PETSC_TRUE;
3375         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3376       }
3377     }
3378   } else {
3379     /* Check if isrow has same processor distribution as mat */
3380     sameDist[0] = PETSC_FALSE;
3381     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3382     if (!n) {
3383       sameDist[0] = PETSC_TRUE;
3384     } else {
3385       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3386       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3387       if (i >= start && j < end) {
3388         sameDist[0] = PETSC_TRUE;
3389       }
3390     }
3391 
3392     /* Check if iscol has same processor distribution as mat */
3393     sameDist[1] = PETSC_FALSE;
3394     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3395     if (!n) {
3396       sameDist[1] = PETSC_TRUE;
3397     } else {
3398       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3399       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3400       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3401     }
3402 
3403     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3404     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3405     sameRowDist = tsameDist[0];
3406   }
3407 
3408   if (sameRowDist) {
3409     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3410       /* isrow and iscol have same processor distribution as mat */
3411       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3412       PetscFunctionReturn(0);
3413     } else { /* sameRowDist */
3414       /* isrow has same processor distribution as mat */
3415       if (call == MAT_INITIAL_MATRIX) {
3416         PetscBool sorted;
3417         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3418         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3419         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3420         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3421 
3422         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3423         if (sorted) {
3424           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3425           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3426           PetscFunctionReturn(0);
3427         }
3428       } else { /* call == MAT_REUSE_MATRIX */
3429         IS    iscol_sub;
3430         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3431         if (iscol_sub) {
3432           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3433           PetscFunctionReturn(0);
3434         }
3435       }
3436     }
3437   }
3438 
3439   /* General case: iscol -> iscol_local which has global size of iscol */
3440   if (call == MAT_REUSE_MATRIX) {
3441     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3442     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3443   } else {
3444     if (!iscol_local) {
3445       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3446     }
3447   }
3448 
3449   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3450   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3451 
3452   if (call == MAT_INITIAL_MATRIX) {
3453     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3454     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3455   }
3456   PetscFunctionReturn(0);
3457 }
3458 
3459 /*@C
3460      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3461          and "off-diagonal" part of the matrix in CSR format.
3462 
3463    Collective on MPI_Comm
3464 
3465    Input Parameters:
3466 +  comm - MPI communicator
3467 .  A - "diagonal" portion of matrix
3468 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3469 -  garray - global index of B columns
3470 
3471    Output Parameter:
3472 .   mat - the matrix, with input A as its local diagonal matrix
3473    Level: advanced
3474 
3475    Notes:
3476        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3477        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3478 
3479 .seealso: MatCreateMPIAIJWithSplitArrays()
3480 @*/
3481 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3482 {
3483   PetscErrorCode ierr;
3484   Mat_MPIAIJ     *maij;
3485   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3486   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3487   PetscScalar    *oa=b->a;
3488   Mat            Bnew;
3489   PetscInt       m,n,N;
3490 
3491   PetscFunctionBegin;
3492   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3493   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3494   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3495   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3496   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3497   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3498 
3499   /* Get global columns of mat */
3500   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3501 
3502   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3503   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3504   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3505   maij = (Mat_MPIAIJ*)(*mat)->data;
3506 
3507   (*mat)->preallocated = PETSC_TRUE;
3508 
3509   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3510   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3511 
3512   /* Set A as diagonal portion of *mat */
3513   maij->A = A;
3514 
3515   nz = oi[m];
3516   for (i=0; i<nz; i++) {
3517     col   = oj[i];
3518     oj[i] = garray[col];
3519   }
3520 
3521    /* Set Bnew as off-diagonal portion of *mat */
3522   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3523   bnew        = (Mat_SeqAIJ*)Bnew->data;
3524   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3525   maij->B     = Bnew;
3526 
3527   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3528 
3529   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3530   b->free_a       = PETSC_FALSE;
3531   b->free_ij      = PETSC_FALSE;
3532   ierr = MatDestroy(&B);CHKERRQ(ierr);
3533 
3534   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3535   bnew->free_a       = PETSC_TRUE;
3536   bnew->free_ij      = PETSC_TRUE;
3537 
3538   /* condense columns of maij->B */
3539   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3540   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3541   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3542   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3543   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3544   PetscFunctionReturn(0);
3545 }
3546 
3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3548 
3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3550 {
3551   PetscErrorCode ierr;
3552   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3553   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3554   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3555   Mat            M,Msub,B=a->B;
3556   MatScalar      *aa;
3557   Mat_SeqAIJ     *aij;
3558   PetscInt       *garray = a->garray,*colsub,Ncols;
3559   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3560   IS             iscol_sub,iscmap;
3561   const PetscInt *is_idx,*cmap;
3562   PetscBool      allcolumns=PETSC_FALSE;
3563   MPI_Comm       comm;
3564 
3565   PetscFunctionBegin;
3566   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3567 
3568   if (call == MAT_REUSE_MATRIX) {
3569     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3570     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3571     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3572 
3573     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3574     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3575 
3576     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3577     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3578 
3579     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3580 
3581   } else { /* call == MAT_INITIAL_MATRIX) */
3582     PetscBool flg;
3583 
3584     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3585     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3586 
3587     /* (1) iscol -> nonscalable iscol_local */
3588     /* Check for special case: each processor gets entire matrix columns */
3589     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3590     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3591     if (allcolumns) {
3592       iscol_sub = iscol_local;
3593       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3594       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3595 
3596     } else {
3597       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3598       PetscInt *idx,*cmap1,k;
3599       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3600       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3601       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3602       count = 0;
3603       k     = 0;
3604       for (i=0; i<Ncols; i++) {
3605         j = is_idx[i];
3606         if (j >= cstart && j < cend) {
3607           /* diagonal part of mat */
3608           idx[count]     = j;
3609           cmap1[count++] = i; /* column index in submat */
3610         } else if (Bn) {
3611           /* off-diagonal part of mat */
3612           if (j == garray[k]) {
3613             idx[count]     = j;
3614             cmap1[count++] = i;  /* column index in submat */
3615           } else if (j > garray[k]) {
3616             while (j > garray[k] && k < Bn-1) k++;
3617             if (j == garray[k]) {
3618               idx[count]     = j;
3619               cmap1[count++] = i; /* column index in submat */
3620             }
3621           }
3622         }
3623       }
3624       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3625 
3626       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3627       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3628       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3629 
3630       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3631     }
3632 
3633     /* (3) Create sequential Msub */
3634     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3635   }
3636 
3637   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3638   aij  = (Mat_SeqAIJ*)(Msub)->data;
3639   ii   = aij->i;
3640   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3641 
3642   /*
3643       m - number of local rows
3644       Ncols - number of columns (same on all processors)
3645       rstart - first row in new global matrix generated
3646   */
3647   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3648 
3649   if (call == MAT_INITIAL_MATRIX) {
3650     /* (4) Create parallel newmat */
3651     PetscMPIInt    rank,size;
3652     PetscInt       csize;
3653 
3654     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3655     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3656 
3657     /*
3658         Determine the number of non-zeros in the diagonal and off-diagonal
3659         portions of the matrix in order to do correct preallocation
3660     */
3661 
3662     /* first get start and end of "diagonal" columns */
3663     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3664     if (csize == PETSC_DECIDE) {
3665       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3666       if (mglobal == Ncols) { /* square matrix */
3667         nlocal = m;
3668       } else {
3669         nlocal = Ncols/size + ((Ncols % size) > rank);
3670       }
3671     } else {
3672       nlocal = csize;
3673     }
3674     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3675     rstart = rend - nlocal;
3676     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3677 
3678     /* next, compute all the lengths */
3679     jj    = aij->j;
3680     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3681     olens = dlens + m;
3682     for (i=0; i<m; i++) {
3683       jend = ii[i+1] - ii[i];
3684       olen = 0;
3685       dlen = 0;
3686       for (j=0; j<jend; j++) {
3687         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3688         else dlen++;
3689         jj++;
3690       }
3691       olens[i] = olen;
3692       dlens[i] = dlen;
3693     }
3694 
3695     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3696     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3697 
3698     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3699     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3700     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3701     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3702     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3703     ierr = PetscFree(dlens);CHKERRQ(ierr);
3704 
3705   } else { /* call == MAT_REUSE_MATRIX */
3706     M    = *newmat;
3707     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3708     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3709     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3710     /*
3711          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3712        rather than the slower MatSetValues().
3713     */
3714     M->was_assembled = PETSC_TRUE;
3715     M->assembled     = PETSC_FALSE;
3716   }
3717 
3718   /* (5) Set values of Msub to *newmat */
3719   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3720   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3721 
3722   jj   = aij->j;
3723   aa   = aij->a;
3724   for (i=0; i<m; i++) {
3725     row = rstart + i;
3726     nz  = ii[i+1] - ii[i];
3727     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3728     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3729     jj += nz; aa += nz;
3730   }
3731   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3732 
3733   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735 
3736   ierr = PetscFree(colsub);CHKERRQ(ierr);
3737 
3738   /* save Msub, iscol_sub and iscmap used in processor for next request */
3739   if (call ==  MAT_INITIAL_MATRIX) {
3740     *newmat = M;
3741     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3742     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3743 
3744     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3745     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3746 
3747     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3748     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3749 
3750     if (iscol_local) {
3751       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3752       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3753     }
3754   }
3755   PetscFunctionReturn(0);
3756 }
3757 
3758 /*
3759     Not great since it makes two copies of the submatrix, first an SeqAIJ
3760   in local and then by concatenating the local matrices the end result.
3761   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3762 
3763   Note: This requires a sequential iscol with all indices.
3764 */
3765 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3766 {
3767   PetscErrorCode ierr;
3768   PetscMPIInt    rank,size;
3769   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3770   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3771   Mat            M,Mreuse;
3772   MatScalar      *aa,*vwork;
3773   MPI_Comm       comm;
3774   Mat_SeqAIJ     *aij;
3775   PetscBool      colflag,allcolumns=PETSC_FALSE;
3776 
3777   PetscFunctionBegin;
3778   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3779   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3780   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3781 
3782   /* Check for special case: each processor gets entire matrix columns */
3783   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3784   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3785   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3786 
3787   if (call ==  MAT_REUSE_MATRIX) {
3788     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3789     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3790     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3791   } else {
3792     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3793   }
3794 
3795   /*
3796       m - number of local rows
3797       n - number of columns (same on all processors)
3798       rstart - first row in new global matrix generated
3799   */
3800   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3801   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3802   if (call == MAT_INITIAL_MATRIX) {
3803     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3804     ii  = aij->i;
3805     jj  = aij->j;
3806 
3807     /*
3808         Determine the number of non-zeros in the diagonal and off-diagonal
3809         portions of the matrix in order to do correct preallocation
3810     */
3811 
3812     /* first get start and end of "diagonal" columns */
3813     if (csize == PETSC_DECIDE) {
3814       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3815       if (mglobal == n) { /* square matrix */
3816         nlocal = m;
3817       } else {
3818         nlocal = n/size + ((n % size) > rank);
3819       }
3820     } else {
3821       nlocal = csize;
3822     }
3823     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3824     rstart = rend - nlocal;
3825     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3826 
3827     /* next, compute all the lengths */
3828     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3829     olens = dlens + m;
3830     for (i=0; i<m; i++) {
3831       jend = ii[i+1] - ii[i];
3832       olen = 0;
3833       dlen = 0;
3834       for (j=0; j<jend; j++) {
3835         if (*jj < rstart || *jj >= rend) olen++;
3836         else dlen++;
3837         jj++;
3838       }
3839       olens[i] = olen;
3840       dlens[i] = dlen;
3841     }
3842     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3843     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3844     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3845     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3846     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3847     ierr = PetscFree(dlens);CHKERRQ(ierr);
3848   } else {
3849     PetscInt ml,nl;
3850 
3851     M    = *newmat;
3852     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3853     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3854     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3855     /*
3856          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3857        rather than the slower MatSetValues().
3858     */
3859     M->was_assembled = PETSC_TRUE;
3860     M->assembled     = PETSC_FALSE;
3861   }
3862   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3863   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3864   ii   = aij->i;
3865   jj   = aij->j;
3866   aa   = aij->a;
3867   for (i=0; i<m; i++) {
3868     row   = rstart + i;
3869     nz    = ii[i+1] - ii[i];
3870     cwork = jj;     jj += nz;
3871     vwork = aa;     aa += nz;
3872     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3873   }
3874 
3875   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3876   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3877   *newmat = M;
3878 
3879   /* save submatrix used in processor for next request */
3880   if (call ==  MAT_INITIAL_MATRIX) {
3881     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3882     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3883   }
3884   PetscFunctionReturn(0);
3885 }
3886 
3887 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3888 {
3889   PetscInt       m,cstart, cend,j,nnz,i,d;
3890   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3891   const PetscInt *JJ;
3892   PetscScalar    *values;
3893   PetscErrorCode ierr;
3894   PetscBool      nooffprocentries;
3895 
3896   PetscFunctionBegin;
3897   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3898 
3899   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3900   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3901   m      = B->rmap->n;
3902   cstart = B->cmap->rstart;
3903   cend   = B->cmap->rend;
3904   rstart = B->rmap->rstart;
3905 
3906   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3907 
3908 #if defined(PETSC_USE_DEBUG)
3909   for (i=0; i<m; i++) {
3910     nnz = Ii[i+1]- Ii[i];
3911     JJ  = J + Ii[i];
3912     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3913     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3914     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3915   }
3916 #endif
3917 
3918   for (i=0; i<m; i++) {
3919     nnz     = Ii[i+1]- Ii[i];
3920     JJ      = J + Ii[i];
3921     nnz_max = PetscMax(nnz_max,nnz);
3922     d       = 0;
3923     for (j=0; j<nnz; j++) {
3924       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3925     }
3926     d_nnz[i] = d;
3927     o_nnz[i] = nnz - d;
3928   }
3929   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3930   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3931 
3932   if (v) values = (PetscScalar*)v;
3933   else {
3934     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3935   }
3936 
3937   for (i=0; i<m; i++) {
3938     ii   = i + rstart;
3939     nnz  = Ii[i+1]- Ii[i];
3940     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3941   }
3942   nooffprocentries    = B->nooffprocentries;
3943   B->nooffprocentries = PETSC_TRUE;
3944   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3945   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3946   B->nooffprocentries = nooffprocentries;
3947 
3948   if (!v) {
3949     ierr = PetscFree(values);CHKERRQ(ierr);
3950   }
3951   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3952   PetscFunctionReturn(0);
3953 }
3954 
3955 /*@
3956    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3957    (the default parallel PETSc format).
3958 
3959    Collective on MPI_Comm
3960 
3961    Input Parameters:
3962 +  B - the matrix
3963 .  i - the indices into j for the start of each local row (starts with zero)
3964 .  j - the column indices for each local row (starts with zero)
3965 -  v - optional values in the matrix
3966 
3967    Level: developer
3968 
3969    Notes:
3970        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3971      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3972      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3973 
3974        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3975 
3976        The format which is used for the sparse matrix input, is equivalent to a
3977     row-major ordering.. i.e for the following matrix, the input data expected is
3978     as shown
3979 
3980 $        1 0 0
3981 $        2 0 3     P0
3982 $       -------
3983 $        4 5 6     P1
3984 $
3985 $     Process0 [P0]: rows_owned=[0,1]
3986 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3987 $        j =  {0,0,2}  [size = 3]
3988 $        v =  {1,2,3}  [size = 3]
3989 $
3990 $     Process1 [P1]: rows_owned=[2]
3991 $        i =  {0,3}    [size = nrow+1  = 1+1]
3992 $        j =  {0,1,2}  [size = 3]
3993 $        v =  {4,5,6}  [size = 3]
3994 
3995 .keywords: matrix, aij, compressed row, sparse, parallel
3996 
3997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3998           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3999 @*/
4000 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4001 {
4002   PetscErrorCode ierr;
4003 
4004   PetscFunctionBegin;
4005   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4006   PetscFunctionReturn(0);
4007 }
4008 
4009 /*@C
4010    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4011    (the default parallel PETSc format).  For good matrix assembly performance
4012    the user should preallocate the matrix storage by setting the parameters
4013    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4014    performance can be increased by more than a factor of 50.
4015 
4016    Collective on MPI_Comm
4017 
4018    Input Parameters:
4019 +  B - the matrix
4020 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4021            (same value is used for all local rows)
4022 .  d_nnz - array containing the number of nonzeros in the various rows of the
4023            DIAGONAL portion of the local submatrix (possibly different for each row)
4024            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4025            The size of this array is equal to the number of local rows, i.e 'm'.
4026            For matrices that will be factored, you must leave room for (and set)
4027            the diagonal entry even if it is zero.
4028 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4029            submatrix (same value is used for all local rows).
4030 -  o_nnz - array containing the number of nonzeros in the various rows of the
4031            OFF-DIAGONAL portion of the local submatrix (possibly different for
4032            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4033            structure. The size of this array is equal to the number
4034            of local rows, i.e 'm'.
4035 
4036    If the *_nnz parameter is given then the *_nz parameter is ignored
4037 
4038    The AIJ format (also called the Yale sparse matrix format or
4039    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4040    storage.  The stored row and column indices begin with zero.
4041    See Users-Manual: ch_mat for details.
4042 
4043    The parallel matrix is partitioned such that the first m0 rows belong to
4044    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4045    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4046 
4047    The DIAGONAL portion of the local submatrix of a processor can be defined
4048    as the submatrix which is obtained by extraction the part corresponding to
4049    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4050    first row that belongs to the processor, r2 is the last row belonging to
4051    the this processor, and c1-c2 is range of indices of the local part of a
4052    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4053    common case of a square matrix, the row and column ranges are the same and
4054    the DIAGONAL part is also square. The remaining portion of the local
4055    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4056 
4057    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4058 
4059    You can call MatGetInfo() to get information on how effective the preallocation was;
4060    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4061    You can also run with the option -info and look for messages with the string
4062    malloc in them to see if additional memory allocation was needed.
4063 
4064    Example usage:
4065 
4066    Consider the following 8x8 matrix with 34 non-zero values, that is
4067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4069    as follows:
4070 
4071 .vb
4072             1  2  0  |  0  3  0  |  0  4
4073     Proc0   0  5  6  |  7  0  0  |  8  0
4074             9  0 10  | 11  0  0  | 12  0
4075     -------------------------------------
4076            13  0 14  | 15 16 17  |  0  0
4077     Proc1   0 18  0  | 19 20 21  |  0  0
4078             0  0  0  | 22 23  0  | 24  0
4079     -------------------------------------
4080     Proc2  25 26 27  |  0  0 28  | 29  0
4081            30  0  0  | 31 32 33  |  0 34
4082 .ve
4083 
4084    This can be represented as a collection of submatrices as:
4085 
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092    Where the submatrices A,B,C are owned by proc0, D,E,F are
4093    owned by proc1, G,H,I are owned by proc2.
4094 
4095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4104    matrix, ans [DF] as another SeqAIJ matrix.
4105 
4106    When d_nz, o_nz parameters are specified, d_nz storage elements are
4107    allocated for every row of the local diagonal submatrix, and o_nz
4108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4109    One way to choose d_nz and o_nz is to use the max nonzerors per local
4110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111    In this case, the values of d_nz,o_nz are:
4112 .vb
4113      proc0 : dnz = 2, o_nz = 2
4114      proc1 : dnz = 3, o_nz = 2
4115      proc2 : dnz = 1, o_nz = 4
4116 .ve
4117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119    for proc3. i.e we are using 12+15+10=37 storage locations to store
4120    34 values.
4121 
4122    When d_nnz, o_nnz parameters are specified, the storage is specified
4123    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124    In the above case the values for d_nnz,o_nnz are:
4125 .vb
4126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130    Here the space allocated is sum of all the above values i.e 34, and
4131    hence pre-allocation is perfect.
4132 
4133    Level: intermediate
4134 
4135 .keywords: matrix, aij, compressed row, sparse, parallel
4136 
4137 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4138           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4139 @*/
4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4141 {
4142   PetscErrorCode ierr;
4143 
4144   PetscFunctionBegin;
4145   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4146   PetscValidType(B,1);
4147   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4148   PetscFunctionReturn(0);
4149 }
4150 
4151 /*@
4152      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4153          CSR format the local rows.
4154 
4155    Collective on MPI_Comm
4156 
4157    Input Parameters:
4158 +  comm - MPI communicator
4159 .  m - number of local rows (Cannot be PETSC_DECIDE)
4160 .  n - This value should be the same as the local size used in creating the
4161        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4162        calculated if N is given) For square matrices n is almost always m.
4163 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4164 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4165 .   i - row indices
4166 .   j - column indices
4167 -   a - matrix values
4168 
4169    Output Parameter:
4170 .   mat - the matrix
4171 
4172    Level: intermediate
4173 
4174    Notes:
4175        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4176      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4177      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4178 
4179        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4180 
4181        The format which is used for the sparse matrix input, is equivalent to a
4182     row-major ordering.. i.e for the following matrix, the input data expected is
4183     as shown
4184 
4185 $        1 0 0
4186 $        2 0 3     P0
4187 $       -------
4188 $        4 5 6     P1
4189 $
4190 $     Process0 [P0]: rows_owned=[0,1]
4191 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4192 $        j =  {0,0,2}  [size = 3]
4193 $        v =  {1,2,3}  [size = 3]
4194 $
4195 $     Process1 [P1]: rows_owned=[2]
4196 $        i =  {0,3}    [size = nrow+1  = 1+1]
4197 $        j =  {0,1,2}  [size = 3]
4198 $        v =  {4,5,6}  [size = 3]
4199 
4200 .keywords: matrix, aij, compressed row, sparse, parallel
4201 
4202 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4203           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4204 @*/
4205 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4206 {
4207   PetscErrorCode ierr;
4208 
4209   PetscFunctionBegin;
4210   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4211   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4212   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4213   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4214   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4215   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4216   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4217   PetscFunctionReturn(0);
4218 }
4219 
4220 /*@C
4221    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4222    (the default parallel PETSc format).  For good matrix assembly performance
4223    the user should preallocate the matrix storage by setting the parameters
4224    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4225    performance can be increased by more than a factor of 50.
4226 
4227    Collective on MPI_Comm
4228 
4229    Input Parameters:
4230 +  comm - MPI communicator
4231 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4232            This value should be the same as the local size used in creating the
4233            y vector for the matrix-vector product y = Ax.
4234 .  n - This value should be the same as the local size used in creating the
4235        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4236        calculated if N is given) For square matrices n is almost always m.
4237 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4238 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4239 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4240            (same value is used for all local rows)
4241 .  d_nnz - array containing the number of nonzeros in the various rows of the
4242            DIAGONAL portion of the local submatrix (possibly different for each row)
4243            or NULL, if d_nz is used to specify the nonzero structure.
4244            The size of this array is equal to the number of local rows, i.e 'm'.
4245 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4246            submatrix (same value is used for all local rows).
4247 -  o_nnz - array containing the number of nonzeros in the various rows of the
4248            OFF-DIAGONAL portion of the local submatrix (possibly different for
4249            each row) or NULL, if o_nz is used to specify the nonzero
4250            structure. The size of this array is equal to the number
4251            of local rows, i.e 'm'.
4252 
4253    Output Parameter:
4254 .  A - the matrix
4255 
4256    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4257    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4258    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4259 
4260    Notes:
4261    If the *_nnz parameter is given then the *_nz parameter is ignored
4262 
4263    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4264    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4265    storage requirements for this matrix.
4266 
4267    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4268    processor than it must be used on all processors that share the object for
4269    that argument.
4270 
4271    The user MUST specify either the local or global matrix dimensions
4272    (possibly both).
4273 
4274    The parallel matrix is partitioned across processors such that the
4275    first m0 rows belong to process 0, the next m1 rows belong to
4276    process 1, the next m2 rows belong to process 2 etc.. where
4277    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4278    values corresponding to [m x N] submatrix.
4279 
4280    The columns are logically partitioned with the n0 columns belonging
4281    to 0th partition, the next n1 columns belonging to the next
4282    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4283 
4284    The DIAGONAL portion of the local submatrix on any given processor
4285    is the submatrix corresponding to the rows and columns m,n
4286    corresponding to the given processor. i.e diagonal matrix on
4287    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4288    etc. The remaining portion of the local submatrix [m x (N-n)]
4289    constitute the OFF-DIAGONAL portion. The example below better
4290    illustrates this concept.
4291 
4292    For a square global matrix we define each processor's diagonal portion
4293    to be its local rows and the corresponding columns (a square submatrix);
4294    each processor's off-diagonal portion encompasses the remainder of the
4295    local matrix (a rectangular submatrix).
4296 
4297    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4298 
4299    When calling this routine with a single process communicator, a matrix of
4300    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4301    type of communicator, use the construction mechanism
4302 .vb
4303      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4304 .ve
4305 
4306 $     MatCreate(...,&A);
4307 $     MatSetType(A,MATMPIAIJ);
4308 $     MatSetSizes(A, m,n,M,N);
4309 $     MatMPIAIJSetPreallocation(A,...);
4310 
4311    By default, this format uses inodes (identical nodes) when possible.
4312    We search for consecutive rows with the same nonzero structure, thereby
4313    reusing matrix information to achieve increased efficiency.
4314 
4315    Options Database Keys:
4316 +  -mat_no_inode  - Do not use inodes
4317 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4318 
4319 
4320 
4321    Example usage:
4322 
4323    Consider the following 8x8 matrix with 34 non-zero values, that is
4324    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4325    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4326    as follows
4327 
4328 .vb
4329             1  2  0  |  0  3  0  |  0  4
4330     Proc0   0  5  6  |  7  0  0  |  8  0
4331             9  0 10  | 11  0  0  | 12  0
4332     -------------------------------------
4333            13  0 14  | 15 16 17  |  0  0
4334     Proc1   0 18  0  | 19 20 21  |  0  0
4335             0  0  0  | 22 23  0  | 24  0
4336     -------------------------------------
4337     Proc2  25 26 27  |  0  0 28  | 29  0
4338            30  0  0  | 31 32 33  |  0 34
4339 .ve
4340 
4341    This can be represented as a collection of submatrices as
4342 
4343 .vb
4344       A B C
4345       D E F
4346       G H I
4347 .ve
4348 
4349    Where the submatrices A,B,C are owned by proc0, D,E,F are
4350    owned by proc1, G,H,I are owned by proc2.
4351 
4352    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4353    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4354    The 'M','N' parameters are 8,8, and have the same values on all procs.
4355 
4356    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4357    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4358    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4359    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4360    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4361    matrix, ans [DF] as another SeqAIJ matrix.
4362 
4363    When d_nz, o_nz parameters are specified, d_nz storage elements are
4364    allocated for every row of the local diagonal submatrix, and o_nz
4365    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4366    One way to choose d_nz and o_nz is to use the max nonzerors per local
4367    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4368    In this case, the values of d_nz,o_nz are
4369 .vb
4370      proc0 : dnz = 2, o_nz = 2
4371      proc1 : dnz = 3, o_nz = 2
4372      proc2 : dnz = 1, o_nz = 4
4373 .ve
4374    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4375    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4376    for proc3. i.e we are using 12+15+10=37 storage locations to store
4377    34 values.
4378 
4379    When d_nnz, o_nnz parameters are specified, the storage is specified
4380    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4381    In the above case the values for d_nnz,o_nnz are
4382 .vb
4383      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4384      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4385      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4386 .ve
4387    Here the space allocated is sum of all the above values i.e 34, and
4388    hence pre-allocation is perfect.
4389 
4390    Level: intermediate
4391 
4392 .keywords: matrix, aij, compressed row, sparse, parallel
4393 
4394 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4395           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4396 @*/
4397 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4398 {
4399   PetscErrorCode ierr;
4400   PetscMPIInt    size;
4401 
4402   PetscFunctionBegin;
4403   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4404   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4405   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4406   if (size > 1) {
4407     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4408     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4409   } else {
4410     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4411     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4412   }
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4417 {
4418   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4419   PetscBool      flg;
4420   PetscErrorCode ierr;
4421 
4422   PetscFunctionBegin;
4423   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4424   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4425   if (Ad)     *Ad     = a->A;
4426   if (Ao)     *Ao     = a->B;
4427   if (colmap) *colmap = a->garray;
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4432 {
4433   PetscErrorCode ierr;
4434   PetscInt       m,N,i,rstart,nnz,Ii;
4435   PetscInt       *indx;
4436   PetscScalar    *values;
4437 
4438   PetscFunctionBegin;
4439   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4440   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4441     PetscInt       *dnz,*onz,sum,bs,cbs;
4442 
4443     if (n == PETSC_DECIDE) {
4444       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4445     }
4446     /* Check sum(n) = N */
4447     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4448     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4449 
4450     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4451     rstart -= m;
4452 
4453     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4454     for (i=0; i<m; i++) {
4455       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4456       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4457       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4458     }
4459 
4460     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4461     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4462     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4463     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4464     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4465     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4466     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4467     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4468   }
4469 
4470   /* numeric phase */
4471   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4472   for (i=0; i<m; i++) {
4473     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4474     Ii   = i + rstart;
4475     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4476     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4477   }
4478   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4479   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4480   PetscFunctionReturn(0);
4481 }
4482 
4483 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4484 {
4485   PetscErrorCode    ierr;
4486   PetscMPIInt       rank;
4487   PetscInt          m,N,i,rstart,nnz;
4488   size_t            len;
4489   const PetscInt    *indx;
4490   PetscViewer       out;
4491   char              *name;
4492   Mat               B;
4493   const PetscScalar *values;
4494 
4495   PetscFunctionBegin;
4496   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4497   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4498   /* Should this be the type of the diagonal block of A? */
4499   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4500   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4501   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4502   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4503   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4504   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4505   for (i=0; i<m; i++) {
4506     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4507     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4508     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4509   }
4510   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4511   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4512 
4513   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4514   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4515   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4516   sprintf(name,"%s.%d",outfile,rank);
4517   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4518   ierr = PetscFree(name);CHKERRQ(ierr);
4519   ierr = MatView(B,out);CHKERRQ(ierr);
4520   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4521   ierr = MatDestroy(&B);CHKERRQ(ierr);
4522   PetscFunctionReturn(0);
4523 }
4524 
4525 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4526 {
4527   PetscErrorCode      ierr;
4528   Mat_Merge_SeqsToMPI *merge;
4529   PetscContainer      container;
4530 
4531   PetscFunctionBegin;
4532   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4533   if (container) {
4534     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4536     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4537     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4538     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4539     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4540     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4541     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4542     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4547     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4548     ierr = PetscFree(merge);CHKERRQ(ierr);
4549     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4550   }
4551   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4552   PetscFunctionReturn(0);
4553 }
4554 
4555 #include <../src/mat/utils/freespace.h>
4556 #include <petscbt.h>
4557 
4558 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4559 {
4560   PetscErrorCode      ierr;
4561   MPI_Comm            comm;
4562   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4563   PetscMPIInt         size,rank,taga,*len_s;
4564   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4565   PetscInt            proc,m;
4566   PetscInt            **buf_ri,**buf_rj;
4567   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4568   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4569   MPI_Request         *s_waits,*r_waits;
4570   MPI_Status          *status;
4571   MatScalar           *aa=a->a;
4572   MatScalar           **abuf_r,*ba_i;
4573   Mat_Merge_SeqsToMPI *merge;
4574   PetscContainer      container;
4575 
4576   PetscFunctionBegin;
4577   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4578   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4579 
4580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4581   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4582 
4583   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4584   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4585 
4586   bi     = merge->bi;
4587   bj     = merge->bj;
4588   buf_ri = merge->buf_ri;
4589   buf_rj = merge->buf_rj;
4590 
4591   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4592   owners = merge->rowmap->range;
4593   len_s  = merge->len_s;
4594 
4595   /* send and recv matrix values */
4596   /*-----------------------------*/
4597   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4598   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4599 
4600   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4601   for (proc=0,k=0; proc<size; proc++) {
4602     if (!len_s[proc]) continue;
4603     i    = owners[proc];
4604     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4605     k++;
4606   }
4607 
4608   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4609   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4610   ierr = PetscFree(status);CHKERRQ(ierr);
4611 
4612   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4613   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4614 
4615   /* insert mat values of mpimat */
4616   /*----------------------------*/
4617   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4618   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4619 
4620   for (k=0; k<merge->nrecv; k++) {
4621     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4622     nrows       = *(buf_ri_k[k]);
4623     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4624     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4625   }
4626 
4627   /* set values of ba */
4628   m = merge->rowmap->n;
4629   for (i=0; i<m; i++) {
4630     arow = owners[rank] + i;
4631     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4632     bnzi = bi[i+1] - bi[i];
4633     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4634 
4635     /* add local non-zero vals of this proc's seqmat into ba */
4636     anzi   = ai[arow+1] - ai[arow];
4637     aj     = a->j + ai[arow];
4638     aa     = a->a + ai[arow];
4639     nextaj = 0;
4640     for (j=0; nextaj<anzi; j++) {
4641       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4642         ba_i[j] += aa[nextaj++];
4643       }
4644     }
4645 
4646     /* add received vals into ba */
4647     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4648       /* i-th row */
4649       if (i == *nextrow[k]) {
4650         anzi   = *(nextai[k]+1) - *nextai[k];
4651         aj     = buf_rj[k] + *(nextai[k]);
4652         aa     = abuf_r[k] + *(nextai[k]);
4653         nextaj = 0;
4654         for (j=0; nextaj<anzi; j++) {
4655           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4656             ba_i[j] += aa[nextaj++];
4657           }
4658         }
4659         nextrow[k]++; nextai[k]++;
4660       }
4661     }
4662     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4663   }
4664   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4665   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4666 
4667   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4668   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4669   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4670   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4671   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4672   PetscFunctionReturn(0);
4673 }
4674 
4675 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4676 {
4677   PetscErrorCode      ierr;
4678   Mat                 B_mpi;
4679   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4680   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4681   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4682   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4683   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4684   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4685   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4686   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4687   MPI_Status          *status;
4688   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4689   PetscBT             lnkbt;
4690   Mat_Merge_SeqsToMPI *merge;
4691   PetscContainer      container;
4692 
4693   PetscFunctionBegin;
4694   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4695 
4696   /* make sure it is a PETSc comm */
4697   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4698   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4699   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4700 
4701   ierr = PetscNew(&merge);CHKERRQ(ierr);
4702   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4703 
4704   /* determine row ownership */
4705   /*---------------------------------------------------------*/
4706   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4707   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4708   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4709   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4710   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4711   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4712   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4713 
4714   m      = merge->rowmap->n;
4715   owners = merge->rowmap->range;
4716 
4717   /* determine the number of messages to send, their lengths */
4718   /*---------------------------------------------------------*/
4719   len_s = merge->len_s;
4720 
4721   len          = 0; /* length of buf_si[] */
4722   merge->nsend = 0;
4723   for (proc=0; proc<size; proc++) {
4724     len_si[proc] = 0;
4725     if (proc == rank) {
4726       len_s[proc] = 0;
4727     } else {
4728       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4729       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4730     }
4731     if (len_s[proc]) {
4732       merge->nsend++;
4733       nrows = 0;
4734       for (i=owners[proc]; i<owners[proc+1]; i++) {
4735         if (ai[i+1] > ai[i]) nrows++;
4736       }
4737       len_si[proc] = 2*(nrows+1);
4738       len         += len_si[proc];
4739     }
4740   }
4741 
4742   /* determine the number and length of messages to receive for ij-structure */
4743   /*-------------------------------------------------------------------------*/
4744   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4745   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4746 
4747   /* post the Irecv of j-structure */
4748   /*-------------------------------*/
4749   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4750   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4751 
4752   /* post the Isend of j-structure */
4753   /*--------------------------------*/
4754   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4755 
4756   for (proc=0, k=0; proc<size; proc++) {
4757     if (!len_s[proc]) continue;
4758     i    = owners[proc];
4759     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4760     k++;
4761   }
4762 
4763   /* receives and sends of j-structure are complete */
4764   /*------------------------------------------------*/
4765   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4766   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4767 
4768   /* send and recv i-structure */
4769   /*---------------------------*/
4770   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4771   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4772 
4773   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4774   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4775   for (proc=0,k=0; proc<size; proc++) {
4776     if (!len_s[proc]) continue;
4777     /* form outgoing message for i-structure:
4778          buf_si[0]:                 nrows to be sent
4779                [1:nrows]:           row index (global)
4780                [nrows+1:2*nrows+1]: i-structure index
4781     */
4782     /*-------------------------------------------*/
4783     nrows       = len_si[proc]/2 - 1;
4784     buf_si_i    = buf_si + nrows+1;
4785     buf_si[0]   = nrows;
4786     buf_si_i[0] = 0;
4787     nrows       = 0;
4788     for (i=owners[proc]; i<owners[proc+1]; i++) {
4789       anzi = ai[i+1] - ai[i];
4790       if (anzi) {
4791         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4792         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4793         nrows++;
4794       }
4795     }
4796     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4797     k++;
4798     buf_si += len_si[proc];
4799   }
4800 
4801   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4802   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4803 
4804   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4805   for (i=0; i<merge->nrecv; i++) {
4806     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4807   }
4808 
4809   ierr = PetscFree(len_si);CHKERRQ(ierr);
4810   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4811   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4812   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4813   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4814   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4815   ierr = PetscFree(status);CHKERRQ(ierr);
4816 
4817   /* compute a local seq matrix in each processor */
4818   /*----------------------------------------------*/
4819   /* allocate bi array and free space for accumulating nonzero column info */
4820   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4821   bi[0] = 0;
4822 
4823   /* create and initialize a linked list */
4824   nlnk = N+1;
4825   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4826 
4827   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4828   len  = ai[owners[rank+1]] - ai[owners[rank]];
4829   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4830 
4831   current_space = free_space;
4832 
4833   /* determine symbolic info for each local row */
4834   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4835 
4836   for (k=0; k<merge->nrecv; k++) {
4837     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4838     nrows       = *buf_ri_k[k];
4839     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4840     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4841   }
4842 
4843   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4844   len  = 0;
4845   for (i=0; i<m; i++) {
4846     bnzi = 0;
4847     /* add local non-zero cols of this proc's seqmat into lnk */
4848     arow  = owners[rank] + i;
4849     anzi  = ai[arow+1] - ai[arow];
4850     aj    = a->j + ai[arow];
4851     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4852     bnzi += nlnk;
4853     /* add received col data into lnk */
4854     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4855       if (i == *nextrow[k]) { /* i-th row */
4856         anzi  = *(nextai[k]+1) - *nextai[k];
4857         aj    = buf_rj[k] + *nextai[k];
4858         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4859         bnzi += nlnk;
4860         nextrow[k]++; nextai[k]++;
4861       }
4862     }
4863     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4864 
4865     /* if free space is not available, make more free space */
4866     if (current_space->local_remaining<bnzi) {
4867       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4868       nspacedouble++;
4869     }
4870     /* copy data into free space, then initialize lnk */
4871     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4872     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4873 
4874     current_space->array           += bnzi;
4875     current_space->local_used      += bnzi;
4876     current_space->local_remaining -= bnzi;
4877 
4878     bi[i+1] = bi[i] + bnzi;
4879   }
4880 
4881   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4882 
4883   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4884   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4885   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4886 
4887   /* create symbolic parallel matrix B_mpi */
4888   /*---------------------------------------*/
4889   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4890   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4891   if (n==PETSC_DECIDE) {
4892     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4893   } else {
4894     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4895   }
4896   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4897   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4898   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4899   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4900   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4901 
4902   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4903   B_mpi->assembled    = PETSC_FALSE;
4904   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4905   merge->bi           = bi;
4906   merge->bj           = bj;
4907   merge->buf_ri       = buf_ri;
4908   merge->buf_rj       = buf_rj;
4909   merge->coi          = NULL;
4910   merge->coj          = NULL;
4911   merge->owners_co    = NULL;
4912 
4913   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4914 
4915   /* attach the supporting struct to B_mpi for reuse */
4916   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4917   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4918   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4919   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4920   *mpimat = B_mpi;
4921 
4922   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4923   PetscFunctionReturn(0);
4924 }
4925 
4926 /*@C
4927       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4928                  matrices from each processor
4929 
4930     Collective on MPI_Comm
4931 
4932    Input Parameters:
4933 +    comm - the communicators the parallel matrix will live on
4934 .    seqmat - the input sequential matrices
4935 .    m - number of local rows (or PETSC_DECIDE)
4936 .    n - number of local columns (or PETSC_DECIDE)
4937 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4938 
4939    Output Parameter:
4940 .    mpimat - the parallel matrix generated
4941 
4942     Level: advanced
4943 
4944    Notes:
4945      The dimensions of the sequential matrix in each processor MUST be the same.
4946      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4947      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4948 @*/
4949 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4950 {
4951   PetscErrorCode ierr;
4952   PetscMPIInt    size;
4953 
4954   PetscFunctionBegin;
4955   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4956   if (size == 1) {
4957     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4958     if (scall == MAT_INITIAL_MATRIX) {
4959       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4960     } else {
4961       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4962     }
4963     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4964     PetscFunctionReturn(0);
4965   }
4966   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4967   if (scall == MAT_INITIAL_MATRIX) {
4968     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4969   }
4970   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4971   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4972   PetscFunctionReturn(0);
4973 }
4974 
4975 /*@
4976      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4977           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4978           with MatGetSize()
4979 
4980     Not Collective
4981 
4982    Input Parameters:
4983 +    A - the matrix
4984 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4985 
4986    Output Parameter:
4987 .    A_loc - the local sequential matrix generated
4988 
4989     Level: developer
4990 
4991 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4992 
4993 @*/
4994 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4995 {
4996   PetscErrorCode ierr;
4997   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4998   Mat_SeqAIJ     *mat,*a,*b;
4999   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5000   MatScalar      *aa,*ba,*cam;
5001   PetscScalar    *ca;
5002   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5003   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5004   PetscBool      match;
5005   MPI_Comm       comm;
5006   PetscMPIInt    size;
5007 
5008   PetscFunctionBegin;
5009   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5010   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5011   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5012   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5013   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5014 
5015   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5016   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5017   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5018   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5019   aa = a->a; ba = b->a;
5020   if (scall == MAT_INITIAL_MATRIX) {
5021     if (size == 1) {
5022       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5023       PetscFunctionReturn(0);
5024     }
5025 
5026     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5027     ci[0] = 0;
5028     for (i=0; i<am; i++) {
5029       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5030     }
5031     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5032     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5033     k    = 0;
5034     for (i=0; i<am; i++) {
5035       ncols_o = bi[i+1] - bi[i];
5036       ncols_d = ai[i+1] - ai[i];
5037       /* off-diagonal portion of A */
5038       for (jo=0; jo<ncols_o; jo++) {
5039         col = cmap[*bj];
5040         if (col >= cstart) break;
5041         cj[k]   = col; bj++;
5042         ca[k++] = *ba++;
5043       }
5044       /* diagonal portion of A */
5045       for (j=0; j<ncols_d; j++) {
5046         cj[k]   = cstart + *aj++;
5047         ca[k++] = *aa++;
5048       }
5049       /* off-diagonal portion of A */
5050       for (j=jo; j<ncols_o; j++) {
5051         cj[k]   = cmap[*bj++];
5052         ca[k++] = *ba++;
5053       }
5054     }
5055     /* put together the new matrix */
5056     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5057     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5058     /* Since these are PETSc arrays, change flags to free them as necessary. */
5059     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5060     mat->free_a  = PETSC_TRUE;
5061     mat->free_ij = PETSC_TRUE;
5062     mat->nonew   = 0;
5063   } else if (scall == MAT_REUSE_MATRIX) {
5064     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5065     ci = mat->i; cj = mat->j; cam = mat->a;
5066     for (i=0; i<am; i++) {
5067       /* off-diagonal portion of A */
5068       ncols_o = bi[i+1] - bi[i];
5069       for (jo=0; jo<ncols_o; jo++) {
5070         col = cmap[*bj];
5071         if (col >= cstart) break;
5072         *cam++ = *ba++; bj++;
5073       }
5074       /* diagonal portion of A */
5075       ncols_d = ai[i+1] - ai[i];
5076       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5077       /* off-diagonal portion of A */
5078       for (j=jo; j<ncols_o; j++) {
5079         *cam++ = *ba++; bj++;
5080       }
5081     }
5082   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5083   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5084   PetscFunctionReturn(0);
5085 }
5086 
5087 /*@C
5088      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5089 
5090     Not Collective
5091 
5092    Input Parameters:
5093 +    A - the matrix
5094 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5095 -    row, col - index sets of rows and columns to extract (or NULL)
5096 
5097    Output Parameter:
5098 .    A_loc - the local sequential matrix generated
5099 
5100     Level: developer
5101 
5102 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5103 
5104 @*/
5105 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5106 {
5107   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5108   PetscErrorCode ierr;
5109   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5110   IS             isrowa,iscola;
5111   Mat            *aloc;
5112   PetscBool      match;
5113 
5114   PetscFunctionBegin;
5115   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5116   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5117   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5118   if (!row) {
5119     start = A->rmap->rstart; end = A->rmap->rend;
5120     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5121   } else {
5122     isrowa = *row;
5123   }
5124   if (!col) {
5125     start = A->cmap->rstart;
5126     cmap  = a->garray;
5127     nzA   = a->A->cmap->n;
5128     nzB   = a->B->cmap->n;
5129     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5130     ncols = 0;
5131     for (i=0; i<nzB; i++) {
5132       if (cmap[i] < start) idx[ncols++] = cmap[i];
5133       else break;
5134     }
5135     imark = i;
5136     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5137     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5138     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5139   } else {
5140     iscola = *col;
5141   }
5142   if (scall != MAT_INITIAL_MATRIX) {
5143     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5144     aloc[0] = *A_loc;
5145   }
5146   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5147   if (!col) { /* attach global id of condensed columns */
5148     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5149   }
5150   *A_loc = aloc[0];
5151   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5152   if (!row) {
5153     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5154   }
5155   if (!col) {
5156     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5157   }
5158   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5159   PetscFunctionReturn(0);
5160 }
5161 
5162 /*@C
5163     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5164 
5165     Collective on Mat
5166 
5167    Input Parameters:
5168 +    A,B - the matrices in mpiaij format
5169 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5170 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5171 
5172    Output Parameter:
5173 +    rowb, colb - index sets of rows and columns of B to extract
5174 -    B_seq - the sequential matrix generated
5175 
5176     Level: developer
5177 
5178 @*/
5179 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5180 {
5181   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5182   PetscErrorCode ierr;
5183   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5184   IS             isrowb,iscolb;
5185   Mat            *bseq=NULL;
5186 
5187   PetscFunctionBegin;
5188   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5189     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5190   }
5191   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5192 
5193   if (scall == MAT_INITIAL_MATRIX) {
5194     start = A->cmap->rstart;
5195     cmap  = a->garray;
5196     nzA   = a->A->cmap->n;
5197     nzB   = a->B->cmap->n;
5198     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5199     ncols = 0;
5200     for (i=0; i<nzB; i++) {  /* row < local row index */
5201       if (cmap[i] < start) idx[ncols++] = cmap[i];
5202       else break;
5203     }
5204     imark = i;
5205     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5206     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5207     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5208     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5209   } else {
5210     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5211     isrowb  = *rowb; iscolb = *colb;
5212     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5213     bseq[0] = *B_seq;
5214   }
5215   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5216   *B_seq = bseq[0];
5217   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5218   if (!rowb) {
5219     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5220   } else {
5221     *rowb = isrowb;
5222   }
5223   if (!colb) {
5224     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5225   } else {
5226     *colb = iscolb;
5227   }
5228   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5229   PetscFunctionReturn(0);
5230 }
5231 
5232 #include <petsc/private/vecscatterimpl.h>
5233 /*
5234     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5235     of the OFF-DIAGONAL portion of local A
5236 
5237     Collective on Mat
5238 
5239    Input Parameters:
5240 +    A,B - the matrices in mpiaij format
5241 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5242 
5243    Output Parameter:
5244 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5245 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5246 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5247 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5248 
5249     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5250      for this matrix. This is not desirable..
5251 
5252     Level: developer
5253 
5254 */
5255 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5256 {
5257   VecScatter_MPI_General *gen_to,*gen_from;
5258   PetscErrorCode         ierr;
5259   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5260   Mat_SeqAIJ             *b_oth;
5261   VecScatter             ctx;
5262   MPI_Comm               comm;
5263   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5264   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5265   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5266   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5267   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5268   MPI_Request            *rwaits = NULL,*swaits = NULL;
5269   MPI_Status             *sstatus,rstatus;
5270   PetscMPIInt            jj,size;
5271   VecScatterType         type;
5272   PetscBool              mpi1;
5273 
5274   PetscFunctionBegin;
5275   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5276   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5277 
5278   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5279     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5280   }
5281   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5282   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5283 
5284   if (size == 1) {
5285     startsj_s = NULL;
5286     bufa_ptr  = NULL;
5287     *B_oth    = NULL;
5288     PetscFunctionReturn(0);
5289   }
5290 
5291   ctx = a->Mvctx;
5292   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5293   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5294   if (!mpi1) {
5295     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5296      thus create a->Mvctx_mpi1 */
5297     if (!a->Mvctx_mpi1) {
5298       a->Mvctx_mpi1_flg = PETSC_TRUE;
5299       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5300     }
5301     ctx = a->Mvctx_mpi1;
5302   }
5303   tag = ((PetscObject)ctx)->tag;
5304 
5305   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5306   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5307   nrecvs   = gen_from->n;
5308   nsends   = gen_to->n;
5309 
5310   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5311   srow    = gen_to->indices;    /* local row index to be sent */
5312   sstarts = gen_to->starts;
5313   sprocs  = gen_to->procs;
5314   sstatus = gen_to->sstatus;
5315   sbs     = gen_to->bs;
5316   rstarts = gen_from->starts;
5317   rprocs  = gen_from->procs;
5318   rbs     = gen_from->bs;
5319 
5320   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5321   if (scall == MAT_INITIAL_MATRIX) {
5322     /* i-array */
5323     /*---------*/
5324     /*  post receives */
5325     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5326     for (i=0; i<nrecvs; i++) {
5327       rowlen = rvalues + rstarts[i]*rbs;
5328       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5329       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5330     }
5331 
5332     /* pack the outgoing message */
5333     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5334 
5335     sstartsj[0] = 0;
5336     rstartsj[0] = 0;
5337     len         = 0; /* total length of j or a array to be sent */
5338     k           = 0;
5339     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5340     for (i=0; i<nsends; i++) {
5341       rowlen = svalues + sstarts[i]*sbs;
5342       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5343       for (j=0; j<nrows; j++) {
5344         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5345         for (l=0; l<sbs; l++) {
5346           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5347 
5348           rowlen[j*sbs+l] = ncols;
5349 
5350           len += ncols;
5351           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5352         }
5353         k++;
5354       }
5355       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5356 
5357       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5358     }
5359     /* recvs and sends of i-array are completed */
5360     i = nrecvs;
5361     while (i--) {
5362       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5363     }
5364     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5365     ierr = PetscFree(svalues);CHKERRQ(ierr);
5366 
5367     /* allocate buffers for sending j and a arrays */
5368     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5369     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5370 
5371     /* create i-array of B_oth */
5372     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5373 
5374     b_othi[0] = 0;
5375     len       = 0; /* total length of j or a array to be received */
5376     k         = 0;
5377     for (i=0; i<nrecvs; i++) {
5378       rowlen = rvalues + rstarts[i]*rbs;
5379       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5380       for (j=0; j<nrows; j++) {
5381         b_othi[k+1] = b_othi[k] + rowlen[j];
5382         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5383         k++;
5384       }
5385       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5386     }
5387     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5388 
5389     /* allocate space for j and a arrrays of B_oth */
5390     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5391     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5392 
5393     /* j-array */
5394     /*---------*/
5395     /*  post receives of j-array */
5396     for (i=0; i<nrecvs; i++) {
5397       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5398       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5399     }
5400 
5401     /* pack the outgoing message j-array */
5402     k = 0;
5403     for (i=0; i<nsends; i++) {
5404       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5405       bufJ  = bufj+sstartsj[i];
5406       for (j=0; j<nrows; j++) {
5407         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5408         for (ll=0; ll<sbs; ll++) {
5409           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5410           for (l=0; l<ncols; l++) {
5411             *bufJ++ = cols[l];
5412           }
5413           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5414         }
5415       }
5416       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5417     }
5418 
5419     /* recvs and sends of j-array are completed */
5420     i = nrecvs;
5421     while (i--) {
5422       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5423     }
5424     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5425   } else if (scall == MAT_REUSE_MATRIX) {
5426     sstartsj = *startsj_s;
5427     rstartsj = *startsj_r;
5428     bufa     = *bufa_ptr;
5429     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5430     b_otha   = b_oth->a;
5431   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5432 
5433   /* a-array */
5434   /*---------*/
5435   /*  post receives of a-array */
5436   for (i=0; i<nrecvs; i++) {
5437     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5438     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5439   }
5440 
5441   /* pack the outgoing message a-array */
5442   k = 0;
5443   for (i=0; i<nsends; i++) {
5444     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5445     bufA  = bufa+sstartsj[i];
5446     for (j=0; j<nrows; j++) {
5447       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5448       for (ll=0; ll<sbs; ll++) {
5449         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5450         for (l=0; l<ncols; l++) {
5451           *bufA++ = vals[l];
5452         }
5453         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5454       }
5455     }
5456     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5457   }
5458   /* recvs and sends of a-array are completed */
5459   i = nrecvs;
5460   while (i--) {
5461     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5462   }
5463   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5464   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5465 
5466   if (scall == MAT_INITIAL_MATRIX) {
5467     /* put together the new matrix */
5468     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5469 
5470     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5471     /* Since these are PETSc arrays, change flags to free them as necessary. */
5472     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5473     b_oth->free_a  = PETSC_TRUE;
5474     b_oth->free_ij = PETSC_TRUE;
5475     b_oth->nonew   = 0;
5476 
5477     ierr = PetscFree(bufj);CHKERRQ(ierr);
5478     if (!startsj_s || !bufa_ptr) {
5479       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5480       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5481     } else {
5482       *startsj_s = sstartsj;
5483       *startsj_r = rstartsj;
5484       *bufa_ptr  = bufa;
5485     }
5486   }
5487   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5488   PetscFunctionReturn(0);
5489 }
5490 
5491 /*@C
5492   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5493 
5494   Not Collective
5495 
5496   Input Parameters:
5497 . A - The matrix in mpiaij format
5498 
5499   Output Parameter:
5500 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5501 . colmap - A map from global column index to local index into lvec
5502 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5503 
5504   Level: developer
5505 
5506 @*/
5507 #if defined(PETSC_USE_CTABLE)
5508 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5509 #else
5510 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5511 #endif
5512 {
5513   Mat_MPIAIJ *a;
5514 
5515   PetscFunctionBegin;
5516   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5517   PetscValidPointer(lvec, 2);
5518   PetscValidPointer(colmap, 3);
5519   PetscValidPointer(multScatter, 4);
5520   a = (Mat_MPIAIJ*) A->data;
5521   if (lvec) *lvec = a->lvec;
5522   if (colmap) *colmap = a->colmap;
5523   if (multScatter) *multScatter = a->Mvctx;
5524   PetscFunctionReturn(0);
5525 }
5526 
5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5528 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5530 #if defined(PETSC_HAVE_MKL_SPARSE)
5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5532 #endif
5533 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5534 #if defined(PETSC_HAVE_ELEMENTAL)
5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5536 #endif
5537 #if defined(PETSC_HAVE_HYPRE)
5538 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5539 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5540 #endif
5541 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5542 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5543 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5544 
5545 /*
5546     Computes (B'*A')' since computing B*A directly is untenable
5547 
5548                n                       p                          p
5549         (              )       (              )         (                  )
5550       m (      A       )  *  n (       B      )   =   m (         C        )
5551         (              )       (              )         (                  )
5552 
5553 */
5554 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5555 {
5556   PetscErrorCode ierr;
5557   Mat            At,Bt,Ct;
5558 
5559   PetscFunctionBegin;
5560   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5561   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5562   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5563   ierr = MatDestroy(&At);CHKERRQ(ierr);
5564   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5565   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5566   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5567   PetscFunctionReturn(0);
5568 }
5569 
5570 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5571 {
5572   PetscErrorCode ierr;
5573   PetscInt       m=A->rmap->n,n=B->cmap->n;
5574   Mat            Cmat;
5575 
5576   PetscFunctionBegin;
5577   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5578   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5579   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5580   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5581   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5582   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5583   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5584   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5585 
5586   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5587 
5588   *C = Cmat;
5589   PetscFunctionReturn(0);
5590 }
5591 
5592 /* ----------------------------------------------------------------*/
5593 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5594 {
5595   PetscErrorCode ierr;
5596 
5597   PetscFunctionBegin;
5598   if (scall == MAT_INITIAL_MATRIX) {
5599     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5600     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5601     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5602   }
5603   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5604   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5605   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5606   PetscFunctionReturn(0);
5607 }
5608 
5609 /*MC
5610    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5611 
5612    Options Database Keys:
5613 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5614 
5615   Level: beginner
5616 
5617 .seealso: MatCreateAIJ()
5618 M*/
5619 
5620 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5621 {
5622   Mat_MPIAIJ     *b;
5623   PetscErrorCode ierr;
5624   PetscMPIInt    size;
5625 
5626   PetscFunctionBegin;
5627   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5628 
5629   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5630   B->data       = (void*)b;
5631   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5632   B->assembled  = PETSC_FALSE;
5633   B->insertmode = NOT_SET_VALUES;
5634   b->size       = size;
5635 
5636   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5637 
5638   /* build cache for off array entries formed */
5639   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5640 
5641   b->donotstash  = PETSC_FALSE;
5642   b->colmap      = 0;
5643   b->garray      = 0;
5644   b->roworiented = PETSC_TRUE;
5645 
5646   /* stuff used for matrix vector multiply */
5647   b->lvec  = NULL;
5648   b->Mvctx = NULL;
5649 
5650   /* stuff for MatGetRow() */
5651   b->rowindices   = 0;
5652   b->rowvalues    = 0;
5653   b->getrowactive = PETSC_FALSE;
5654 
5655   /* flexible pointer used in CUSP/CUSPARSE classes */
5656   b->spptr = NULL;
5657 
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5668 #if defined(PETSC_HAVE_MKL_SPARSE)
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5670 #endif
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5673 #if defined(PETSC_HAVE_ELEMENTAL)
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5675 #endif
5676 #if defined(PETSC_HAVE_HYPRE)
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5678 #endif
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5684 #if defined(PETSC_HAVE_HYPRE)
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5686 #endif
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5689   PetscFunctionReturn(0);
5690 }
5691 
5692 /*@C
5693      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5694          and "off-diagonal" part of the matrix in CSR format.
5695 
5696    Collective on MPI_Comm
5697 
5698    Input Parameters:
5699 +  comm - MPI communicator
5700 .  m - number of local rows (Cannot be PETSC_DECIDE)
5701 .  n - This value should be the same as the local size used in creating the
5702        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5703        calculated if N is given) For square matrices n is almost always m.
5704 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5705 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5706 .   i - row indices for "diagonal" portion of matrix
5707 .   j - column indices
5708 .   a - matrix values
5709 .   oi - row indices for "off-diagonal" portion of matrix
5710 .   oj - column indices
5711 -   oa - matrix values
5712 
5713    Output Parameter:
5714 .   mat - the matrix
5715 
5716    Level: advanced
5717 
5718    Notes:
5719        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5720        must free the arrays once the matrix has been destroyed and not before.
5721 
5722        The i and j indices are 0 based
5723 
5724        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5725 
5726        This sets local rows and cannot be used to set off-processor values.
5727 
5728        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5729        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5730        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5731        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5732        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5733        communication if it is known that only local entries will be set.
5734 
5735 .keywords: matrix, aij, compressed row, sparse, parallel
5736 
5737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5738           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5739 @*/
5740 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5741 {
5742   PetscErrorCode ierr;
5743   Mat_MPIAIJ     *maij;
5744 
5745   PetscFunctionBegin;
5746   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5747   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5748   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5749   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5750   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5751   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5752   maij = (Mat_MPIAIJ*) (*mat)->data;
5753 
5754   (*mat)->preallocated = PETSC_TRUE;
5755 
5756   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5757   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5758 
5759   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5760   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5761 
5762   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5763   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5764   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5765   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5766 
5767   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5768   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5769   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5770   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5771   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5772   PetscFunctionReturn(0);
5773 }
5774 
5775 /*
5776     Special version for direct calls from Fortran
5777 */
5778 #include <petsc/private/fortranimpl.h>
5779 
5780 /* Change these macros so can be used in void function */
5781 #undef CHKERRQ
5782 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5783 #undef SETERRQ2
5784 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5785 #undef SETERRQ3
5786 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5787 #undef SETERRQ
5788 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5789 
5790 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5791 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5792 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5793 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5794 #else
5795 #endif
5796 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5797 {
5798   Mat            mat  = *mmat;
5799   PetscInt       m    = *mm, n = *mn;
5800   InsertMode     addv = *maddv;
5801   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5802   PetscScalar    value;
5803   PetscErrorCode ierr;
5804 
5805   MatCheckPreallocated(mat,1);
5806   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5807 
5808 #if defined(PETSC_USE_DEBUG)
5809   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5810 #endif
5811   {
5812     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5813     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5814     PetscBool roworiented = aij->roworiented;
5815 
5816     /* Some Variables required in the macro */
5817     Mat        A                 = aij->A;
5818     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5819     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5820     MatScalar  *aa               = a->a;
5821     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5822     Mat        B                 = aij->B;
5823     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5824     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5825     MatScalar  *ba               = b->a;
5826 
5827     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5828     PetscInt  nonew = a->nonew;
5829     MatScalar *ap1,*ap2;
5830 
5831     PetscFunctionBegin;
5832     for (i=0; i<m; i++) {
5833       if (im[i] < 0) continue;
5834 #if defined(PETSC_USE_DEBUG)
5835       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5836 #endif
5837       if (im[i] >= rstart && im[i] < rend) {
5838         row      = im[i] - rstart;
5839         lastcol1 = -1;
5840         rp1      = aj + ai[row];
5841         ap1      = aa + ai[row];
5842         rmax1    = aimax[row];
5843         nrow1    = ailen[row];
5844         low1     = 0;
5845         high1    = nrow1;
5846         lastcol2 = -1;
5847         rp2      = bj + bi[row];
5848         ap2      = ba + bi[row];
5849         rmax2    = bimax[row];
5850         nrow2    = bilen[row];
5851         low2     = 0;
5852         high2    = nrow2;
5853 
5854         for (j=0; j<n; j++) {
5855           if (roworiented) value = v[i*n+j];
5856           else value = v[i+j*m];
5857           if (in[j] >= cstart && in[j] < cend) {
5858             col = in[j] - cstart;
5859             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5860             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5861           } else if (in[j] < 0) continue;
5862 #if defined(PETSC_USE_DEBUG)
5863           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5864           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5865 #endif
5866           else {
5867             if (mat->was_assembled) {
5868               if (!aij->colmap) {
5869                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5870               }
5871 #if defined(PETSC_USE_CTABLE)
5872               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5873               col--;
5874 #else
5875               col = aij->colmap[in[j]] - 1;
5876 #endif
5877               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5878               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5879                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5880                 col  =  in[j];
5881                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5882                 B     = aij->B;
5883                 b     = (Mat_SeqAIJ*)B->data;
5884                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5885                 rp2   = bj + bi[row];
5886                 ap2   = ba + bi[row];
5887                 rmax2 = bimax[row];
5888                 nrow2 = bilen[row];
5889                 low2  = 0;
5890                 high2 = nrow2;
5891                 bm    = aij->B->rmap->n;
5892                 ba    = b->a;
5893               }
5894             } else col = in[j];
5895             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5896           }
5897         }
5898       } else if (!aij->donotstash) {
5899         if (roworiented) {
5900           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5901         } else {
5902           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5903         }
5904       }
5905     }
5906   }
5907   PetscFunctionReturnVoid();
5908 }
5909