xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b4bf0e97a1dcc10d3a59d1c4e86c2d659e0fa4a1)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscMPIInt    size;
1105 
1106   PetscFunctionBegin;
1107   /* Easy test: symmetric diagonal block */
1108   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1109   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1110   if (!*f) PetscFunctionReturn(0);
1111   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1113   if (size == 1) PetscFunctionReturn(0);
1114 
1115   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1116   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1117   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1118   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1119   for (i=0; i<first; i++) notme[i] = i;
1120   for (i=last; i<M; i++) notme[i-last+first] = i;
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1122   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1123   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1124   Aoff = Aoffs[0];
1125   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1126   Boff = Boffs[0];
1127   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1128   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1129   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1130   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1131   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1132   ierr = PetscFree(notme);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1137 {
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* send it on its way */
1154   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155   /* do local part */
1156   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1157   /* receive remote parts */
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1167 {
1168   PetscErrorCode ierr;
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170 
1171   PetscFunctionBegin;
1172   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1173   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1174   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1179 {
1180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1185   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1190 {
1191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195 #if defined(PETSC_USE_LOG)
1196   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1197 #endif
1198   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1199   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1200   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1201   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1202 #if defined(PETSC_USE_CTABLE)
1203   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1204 #else
1205   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1206 #endif
1207   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1208   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1209   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1210   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1211   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1212   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1213   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1214 
1215   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_ELEMENTAL)
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1226 #endif
1227 #if defined(PETSC_HAVE_HYPRE)
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1230 #endif
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1237 {
1238   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1239   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1240   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1241   PetscErrorCode ierr;
1242   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1243   int            fd;
1244   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1245   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1246   PetscScalar    *column_values;
1247   PetscInt       message_count,flowcontrolcount;
1248   FILE           *file;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1252   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1253   nz   = A->nz + B->nz;
1254   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1255   if (!rank) {
1256     header[0] = MAT_FILE_CLASSID;
1257     header[1] = mat->rmap->N;
1258     header[2] = mat->cmap->N;
1259 
1260     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     /* get largest number of rows any processor has */
1263     rlen  = mat->rmap->n;
1264     range = mat->rmap->range;
1265     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1266   } else {
1267     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     rlen = mat->rmap->n;
1269   }
1270 
1271   /* load up the local row counts */
1272   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1273   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1274 
1275   /* store the row lengths to the file */
1276   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1279     for (i=1; i<size; i++) {
1280       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1281       rlen = range[i+1] - range[i];
1282       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     }
1285     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1286   } else {
1287     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1288     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1290   }
1291   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1292 
1293   /* load up the local column indices */
1294   nzmax = nz; /* th processor needs space a largest processor needs */
1295   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1297   cnt   = 0;
1298   for (i=0; i<mat->rmap->n; i++) {
1299     for (j=B->i[i]; j<B->i[i+1]; j++) {
1300       if ((col = garray[B->j[j]]) > cstart) break;
1301       column_indices[cnt++] = col;
1302     }
1303     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1304     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1305   }
1306   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1307 
1308   /* store the column indices to the file */
1309   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1310   if (!rank) {
1311     MPI_Status status;
1312     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     for (i=1; i<size; i++) {
1314       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1315       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1316       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1317       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1319     }
1320     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1321   } else {
1322     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1323     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1324     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1325     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1326   }
1327   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1328 
1329   /* load up the local column values */
1330   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1331   cnt  = 0;
1332   for (i=0; i<mat->rmap->n; i++) {
1333     for (j=B->i[i]; j<B->i[i+1]; j++) {
1334       if (garray[B->j[j]] > cstart) break;
1335       column_values[cnt++] = B->a[j];
1336     }
1337     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1338     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1339   }
1340   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1341 
1342   /* store the column values to the file */
1343   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1344   if (!rank) {
1345     MPI_Status status;
1346     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1350       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1351       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1352       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1353     }
1354     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1355   } else {
1356     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1357     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1359     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1360   }
1361   ierr = PetscFree(column_values);CHKERRQ(ierr);
1362 
1363   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1364   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #include <petscdraw.h>
1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1370 {
1371   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1372   PetscErrorCode    ierr;
1373   PetscMPIInt       rank = aij->rank,size = aij->size;
1374   PetscBool         isdraw,iascii,isbinary;
1375   PetscViewer       sviewer;
1376   PetscViewerFormat format;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1380   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1382   if (iascii) {
1383     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1384     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1385       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1386       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1387       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1388       for (i=0; i<(PetscInt)size; i++) {
1389         nmax = PetscMax(nmax,nz[i]);
1390         nmin = PetscMin(nmin,nz[i]);
1391         navg += nz[i];
1392       }
1393       ierr = PetscFree(nz);CHKERRQ(ierr);
1394       navg = navg/size;
1395       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1396       PetscFunctionReturn(0);
1397     }
1398     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1399     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1400       MatInfo   info;
1401       PetscBool inodes;
1402 
1403       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1404       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1405       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1406       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1407       if (!inodes) {
1408         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1409                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1410       } else {
1411         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1412                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1413       }
1414       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1416       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1420       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1421       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1422       PetscFunctionReturn(0);
1423     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1424       PetscInt inodecount,inodelimit,*inodes;
1425       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1426       if (inodes) {
1427         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1428       } else {
1429         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1430       }
1431       PetscFunctionReturn(0);
1432     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1433       PetscFunctionReturn(0);
1434     }
1435   } else if (isbinary) {
1436     if (size == 1) {
1437       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1438       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1439     } else {
1440       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1441     }
1442     PetscFunctionReturn(0);
1443   } else if (isdraw) {
1444     PetscDraw draw;
1445     PetscBool isnull;
1446     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1447     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1448     if (isnull) PetscFunctionReturn(0);
1449   }
1450 
1451   {
1452     /* assemble the entire matrix onto first processor. */
1453     Mat        A;
1454     Mat_SeqAIJ *Aloc;
1455     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1456     MatScalar  *a;
1457 
1458     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1459     if (!rank) {
1460       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1463     }
1464     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1465     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1466     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1467     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1468     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1469 
1470     /* copy over the A part */
1471     Aloc = (Mat_SeqAIJ*)aij->A->data;
1472     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1473     row  = mat->rmap->rstart;
1474     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1475     for (i=0; i<m; i++) {
1476       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1477       row++;
1478       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1479     }
1480     aj = Aloc->j;
1481     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1482 
1483     /* copy over the B part */
1484     Aloc = (Mat_SeqAIJ*)aij->B->data;
1485     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1486     row  = mat->rmap->rstart;
1487     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1488     ct   = cols;
1489     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1490     for (i=0; i<m; i++) {
1491       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1492       row++;
1493       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1494     }
1495     ierr = PetscFree(ct);CHKERRQ(ierr);
1496     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1497     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1498     /*
1499        Everyone has to call to draw the matrix since the graphics waits are
1500        synchronized across all processors that share the PetscDraw object
1501     */
1502     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1503     if (!rank) {
1504       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1505       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1506     }
1507     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1508     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1509     ierr = MatDestroy(&A);CHKERRQ(ierr);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1515 {
1516   PetscErrorCode ierr;
1517   PetscBool      iascii,isdraw,issocket,isbinary;
1518 
1519   PetscFunctionBegin;
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1521   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1524   if (iascii || isdraw || isbinary || issocket) {
1525     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1531 {
1532   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1533   PetscErrorCode ierr;
1534   Vec            bb1 = 0;
1535   PetscBool      hasop;
1536 
1537   PetscFunctionBegin;
1538   if (flag == SOR_APPLY_UPPER) {
1539     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1540     PetscFunctionReturn(0);
1541   }
1542 
1543   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1544     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1545   }
1546 
1547   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1548     if (flag & SOR_ZERO_INITIAL_GUESS) {
1549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550       its--;
1551     }
1552 
1553     while (its--) {
1554       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557       /* update rhs: bb1 = bb - B*x */
1558       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1560 
1561       /* local sweep */
1562       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1563     }
1564   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1565     if (flag & SOR_ZERO_INITIAL_GUESS) {
1566       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1567       its--;
1568     }
1569     while (its--) {
1570       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572 
1573       /* update rhs: bb1 = bb - B*x */
1574       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1575       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1576 
1577       /* local sweep */
1578       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1579     }
1580   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1581     if (flag & SOR_ZERO_INITIAL_GUESS) {
1582       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1583       its--;
1584     }
1585     while (its--) {
1586       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588 
1589       /* update rhs: bb1 = bb - B*x */
1590       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1591       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1592 
1593       /* local sweep */
1594       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1595     }
1596   } else if (flag & SOR_EISENSTAT) {
1597     Vec xx1;
1598 
1599     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1601 
1602     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (!mat->diag) {
1605       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1606       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1607     }
1608     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1609     if (hasop) {
1610       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1611     } else {
1612       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1613     }
1614     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1615 
1616     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1617 
1618     /* local sweep */
1619     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1620     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1621     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1622   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1623 
1624   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1625 
1626   matin->factorerrortype = mat->A->factorerrortype;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1631 {
1632   Mat            aA,aB,Aperm;
1633   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1634   PetscScalar    *aa,*ba;
1635   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1636   PetscSF        rowsf,sf;
1637   IS             parcolp = NULL;
1638   PetscBool      done;
1639   PetscErrorCode ierr;
1640 
1641   PetscFunctionBegin;
1642   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1643   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1644   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1645   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1646 
1647   /* Invert row permutation to find out where my rows should go */
1648   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1649   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1650   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1651   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1652   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1653   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1654 
1655   /* Invert column permutation to find out where my columns should go */
1656   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1657   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1658   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1659   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1660   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1661   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1662   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1663 
1664   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1665   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1666   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1667 
1668   /* Find out where my gcols should go */
1669   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1670   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1671   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1672   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1673   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1674   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1675   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1676   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1677 
1678   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1679   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1680   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1681   for (i=0; i<m; i++) {
1682     PetscInt row = rdest[i],rowner;
1683     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1684     for (j=ai[i]; j<ai[i+1]; j++) {
1685       PetscInt cowner,col = cdest[aj[j]];
1686       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1687       if (rowner == cowner) dnnz[i]++;
1688       else onnz[i]++;
1689     }
1690     for (j=bi[i]; j<bi[i+1]; j++) {
1691       PetscInt cowner,col = gcdest[bj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscReal      isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set = PETSC_TRUE;
1825     A->spd     = flg;
1826     if (flg) {
1827       A->symmetric                  = PETSC_TRUE;
1828       A->structurally_symmetric     = PETSC_TRUE;
1829       A->symmetric_set              = PETSC_TRUE;
1830       A->structurally_symmetric_set = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     MatCheckPreallocated(A,1);
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837   case MAT_STRUCTURALLY_SYMMETRIC:
1838     MatCheckPreallocated(A,1);
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_HERMITIAN:
1842     MatCheckPreallocated(A,1);
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   case MAT_SYMMETRY_ETERNAL:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_SUBMAT_SINGLEIS:
1850     A->submat_singleis = flg;
1851     break;
1852   case MAT_STRUCTURE_ONLY:
1853     /* The option is handled directly by MatSetOption() */
1854     break;
1855   default:
1856     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) max = tmp;
1883     }
1884     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz  = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1940 {
1941   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1942 
1943   PetscFunctionBegin;
1944   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1945   aij->getrowactive = PETSC_FALSE;
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1950 {
1951   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1952   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,j,cstart = mat->cmap->rstart;
1955   PetscReal      sum = 0.0;
1956   MatScalar      *v;
1957 
1958   PetscFunctionBegin;
1959   if (aij->size == 1) {
1960     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1961   } else {
1962     if (type == NORM_FROBENIUS) {
1963       v = amat->a;
1964       for (i=0; i<amat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970       }
1971       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1978       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v     = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v   = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2010       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2011     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2017 {
2018   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2019   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2020   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2021   PetscErrorCode ierr;
2022   Mat            B,A_diag,*B_diag;
2023   MatScalar      *array;
2024 
2025   PetscFunctionBegin;
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2031     PetscSFNode          *oloc;
2032     PETSC_UNUSED PetscSF sf;
2033 
2034     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2035     /* compute d_nnz for preallocation */
2036     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2037     for (i=0; i<ai[ma]; i++) {
2038       d_nnz[aj[i]]++;
2039     }
2040     /* compute local off-diagonal contributions */
2041     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2043     /* map those to global */
2044     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2045     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2046     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2047     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2048     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2051 
2052     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2053     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2054     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2055     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2056     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2057     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2058   } else {
2059     B    = *matout;
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2061   }
2062 
2063   b           = (Mat_MPIAIJ*)B->data;
2064   A_diag      = a->A;
2065   B_diag      = &b->A;
2066   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2067   A_diag_ncol = A_diag->cmap->N;
2068   B_diag_ilen = sub_B_diag->ilen;
2069   B_diag_i    = sub_B_diag->i;
2070 
2071   /* Set ilen for diagonal of B */
2072   for (i=0; i<A_diag_ncol; i++) {
2073     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2074   }
2075 
2076   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2077   very quickly (=without using MatSetValues), because all writes are local. */
2078   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2079 
2080   /* copy over the B part */
2081   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2082   array = Bloc->a;
2083   row   = A->rmap->rstart;
2084   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2085   cols_tmp = cols;
2086   for (i=0; i<mb; i++) {
2087     ncol = bi[i+1]-bi[i];
2088     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2089     row++;
2090     array += ncol; cols_tmp += ncol;
2091   }
2092   ierr = PetscFree(cols);CHKERRQ(ierr);
2093 
2094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2097     *matout = B;
2098   } else {
2099     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2105 {
2106   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2107   Mat            a    = aij->A,b = aij->B;
2108   PetscErrorCode ierr;
2109   PetscInt       s1,s2,s3;
2110 
2111   PetscFunctionBegin;
2112   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2113   if (rr) {
2114     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2115     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2116     /* Overlap communication with computation. */
2117     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118   }
2119   if (ll) {
2120     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2121     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2122     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2123   }
2124   /* scale  the diagonal block */
2125   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2126 
2127   if (rr) {
2128     /* Do a scatter end and then right scale the off-diagonal block */
2129     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2130     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2146 {
2147   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2148   Mat            a,b,c,d;
2149   PetscBool      flg;
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   a = matA->A; b = matA->B;
2154   c = matB->A; d = matB->B;
2155 
2156   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2157   if (flg) {
2158     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2159   }
2160   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2165 {
2166   PetscErrorCode ierr;
2167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2168   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2169 
2170   PetscFunctionBegin;
2171   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2172   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2173     /* because of the column compression in the off-processor part of the matrix a->B,
2174        the number of columns in a->B and b->B may be different, hence we cannot call
2175        the MatCopy() directly on the two parts. If need be, we can provide a more
2176        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2177        then copying the submatrices */
2178     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2179   } else {
2180     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2181     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2182   }
2183   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2188 {
2189   PetscErrorCode ierr;
2190 
2191   PetscFunctionBegin;
2192   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 /*
2197    Computes the number of nonzeros per row needed for preallocation when X and Y
2198    have different nonzero structure.
2199 */
2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2201 {
2202   PetscInt       i,j,k,nzx,nzy;
2203 
2204   PetscFunctionBegin;
2205   /* Set the number of nonzeros in the new matrix */
2206   for (i=0; i<m; i++) {
2207     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2208     nzx = xi[i+1] - xi[i];
2209     nzy = yi[i+1] - yi[i];
2210     nnz[i] = 0;
2211     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2212       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2213       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2214       nnz[i]++;
2215     }
2216     for (; k<nzy; k++) nnz[i]++;
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2223 {
2224   PetscErrorCode ierr;
2225   PetscInt       m = Y->rmap->N;
2226   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2227   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2238   PetscBLASInt   bnz,one=1;
2239   Mat_SeqAIJ     *x,*y;
2240 
2241   PetscFunctionBegin;
2242   if (str == SAME_NONZERO_PATTERN) {
2243     PetscScalar alpha = a;
2244     x    = (Mat_SeqAIJ*)xx->A->data;
2245     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2246     y    = (Mat_SeqAIJ*)yy->A->data;
2247     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2248     x    = (Mat_SeqAIJ*)xx->B->data;
2249     y    = (Mat_SeqAIJ*)yy->B->data;
2250     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2251     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2252     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2253   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2254     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2255   } else {
2256     Mat      B;
2257     PetscInt *nnz_d,*nnz_o;
2258     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2259     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2260     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2261     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2262     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2263     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2264     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2265     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2266     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2267     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2268     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2269     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2270     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2271     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2272   }
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2277 
2278 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2279 {
2280 #if defined(PETSC_USE_COMPLEX)
2281   PetscErrorCode ierr;
2282   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2283 
2284   PetscFunctionBegin;
2285   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2286   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2287 #else
2288   PetscFunctionBegin;
2289 #endif
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2300   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2311   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2316 {
2317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2318   PetscErrorCode ierr;
2319   PetscInt       i,*idxb = 0;
2320   PetscScalar    *va,*vb;
2321   Vec            vtmp;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2325   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2326   if (idx) {
2327     for (i=0; i<A->rmap->n; i++) {
2328       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2329     }
2330   }
2331 
2332   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2333   if (idx) {
2334     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2335   }
2336   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2337   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2338 
2339   for (i=0; i<A->rmap->n; i++) {
2340     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2341       va[i] = vb[i];
2342       if (idx) idx[i] = a->garray[idxb[i]];
2343     }
2344   }
2345 
2346   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2347   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2348   ierr = PetscFree(idxb);CHKERRQ(ierr);
2349   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2354 {
2355   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2356   PetscErrorCode ierr;
2357   PetscInt       i,*idxb = 0;
2358   PetscScalar    *va,*vb;
2359   Vec            vtmp;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2363   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2364   if (idx) {
2365     for (i=0; i<A->cmap->n; i++) {
2366       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2367     }
2368   }
2369 
2370   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2371   if (idx) {
2372     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2373   }
2374   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2375   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2376 
2377   for (i=0; i<A->rmap->n; i++) {
2378     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2379       va[i] = vb[i];
2380       if (idx) idx[i] = a->garray[idxb[i]];
2381     }
2382   }
2383 
2384   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2385   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2386   ierr = PetscFree(idxb);CHKERRQ(ierr);
2387   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2392 {
2393   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2394   PetscInt       n      = A->rmap->n;
2395   PetscInt       cstart = A->cmap->rstart;
2396   PetscInt       *cmap  = mat->garray;
2397   PetscInt       *diagIdx, *offdiagIdx;
2398   Vec            diagV, offdiagV;
2399   PetscScalar    *a, *diagA, *offdiagA;
2400   PetscInt       r;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2405   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2406   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2407   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2408   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2409   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2410   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2411   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2412   for (r = 0; r < n; ++r) {
2413     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2414       a[r]   = diagA[r];
2415       idx[r] = cstart + diagIdx[r];
2416     } else {
2417       a[r]   = offdiagA[r];
2418       idx[r] = cmap[offdiagIdx[r]];
2419     }
2420   }
2421   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2423   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2424   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2425   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2426   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2431 {
2432   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2433   PetscInt       n      = A->rmap->n;
2434   PetscInt       cstart = A->cmap->rstart;
2435   PetscInt       *cmap  = mat->garray;
2436   PetscInt       *diagIdx, *offdiagIdx;
2437   Vec            diagV, offdiagV;
2438   PetscScalar    *a, *diagA, *offdiagA;
2439   PetscInt       r;
2440   PetscErrorCode ierr;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2444   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2446   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2447   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2448   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2449   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2450   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   for (r = 0; r < n; ++r) {
2452     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2453       a[r]   = diagA[r];
2454       idx[r] = cstart + diagIdx[r];
2455     } else {
2456       a[r]   = offdiagA[r];
2457       idx[r] = cmap[offdiagIdx[r]];
2458     }
2459   }
2460   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2461   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2463   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2464   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2465   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2470 {
2471   PetscErrorCode ierr;
2472   Mat            *dummy;
2473 
2474   PetscFunctionBegin;
2475   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2476   *newmat = *dummy;
2477   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2482 {
2483   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2484   PetscErrorCode ierr;
2485 
2486   PetscFunctionBegin;
2487   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2488   A->factorerrortype = a->A->factorerrortype;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2493 {
2494   PetscErrorCode ierr;
2495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2496 
2497   PetscFunctionBegin;
2498   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2499   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2500   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2501   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2506 {
2507   PetscFunctionBegin;
2508   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2509   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 /*@
2514    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2515 
2516    Collective on Mat
2517 
2518    Input Parameters:
2519 +    A - the matrix
2520 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2521 
2522  Level: advanced
2523 
2524 @*/
2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2526 {
2527   PetscErrorCode       ierr;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2535 {
2536   PetscErrorCode       ierr;
2537   PetscBool            sc = PETSC_FALSE,flg;
2538 
2539   PetscFunctionBegin;
2540   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2541   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2542   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2543   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2544   if (flg) {
2545     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2546   }
2547   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2552 {
2553   PetscErrorCode ierr;
2554   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2555   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2556 
2557   PetscFunctionBegin;
2558   if (!Y->preallocated) {
2559     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2560   } else if (!aij->nz) {
2561     PetscInt nonew = aij->nonew;
2562     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2563     aij->nonew = nonew;
2564   }
2565   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2570 {
2571   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2572   PetscErrorCode ierr;
2573 
2574   PetscFunctionBegin;
2575   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2576   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2577   if (d) {
2578     PetscInt rstart;
2579     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2580     *d += rstart;
2581 
2582   }
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2587 {
2588   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2589   PetscErrorCode ierr;
2590 
2591   PetscFunctionBegin;
2592   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 /* -------------------------------------------------------------------*/
2597 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2598                                        MatGetRow_MPIAIJ,
2599                                        MatRestoreRow_MPIAIJ,
2600                                        MatMult_MPIAIJ,
2601                                 /* 4*/ MatMultAdd_MPIAIJ,
2602                                        MatMultTranspose_MPIAIJ,
2603                                        MatMultTransposeAdd_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                 /*10*/ 0,
2608                                        0,
2609                                        0,
2610                                        MatSOR_MPIAIJ,
2611                                        MatTranspose_MPIAIJ,
2612                                 /*15*/ MatGetInfo_MPIAIJ,
2613                                        MatEqual_MPIAIJ,
2614                                        MatGetDiagonal_MPIAIJ,
2615                                        MatDiagonalScale_MPIAIJ,
2616                                        MatNorm_MPIAIJ,
2617                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2618                                        MatAssemblyEnd_MPIAIJ,
2619                                        MatSetOption_MPIAIJ,
2620                                        MatZeroEntries_MPIAIJ,
2621                                 /*24*/ MatZeroRows_MPIAIJ,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                 /*29*/ MatSetUp_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        MatGetDiagonalBlock_MPIAIJ,
2630                                        0,
2631                                 /*34*/ MatDuplicate_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*39*/ MatAXPY_MPIAIJ,
2637                                        MatCreateSubMatrices_MPIAIJ,
2638                                        MatIncreaseOverlap_MPIAIJ,
2639                                        MatGetValues_MPIAIJ,
2640                                        MatCopy_MPIAIJ,
2641                                 /*44*/ MatGetRowMax_MPIAIJ,
2642                                        MatScale_MPIAIJ,
2643                                        MatShift_MPIAIJ,
2644                                        MatDiagonalSet_MPIAIJ,
2645                                        MatZeroRowsColumns_MPIAIJ,
2646                                 /*49*/ MatSetRandom_MPIAIJ,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2652                                        0,
2653                                        MatSetUnfactored_MPIAIJ,
2654                                        MatPermute_MPIAIJ,
2655                                        0,
2656                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2657                                        MatDestroy_MPIAIJ,
2658                                        MatView_MPIAIJ,
2659                                        0,
2660                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2661                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2662                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2667                                        MatGetRowMinAbs_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                 /*75*/ MatFDColoringApply_AIJ,
2673                                        MatSetFromOptions_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                        MatFindZeroDiagonals_MPIAIJ,
2677                                 /*80*/ 0,
2678                                        0,
2679                                        0,
2680                                 /*83*/ MatLoad_MPIAIJ,
2681                                        MatIsSymmetric_MPIAIJ,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2687                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2688                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2689                                        MatPtAP_MPIAIJ_MPIAIJ,
2690                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2691                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                 /*99*/ 0,
2697                                        0,
2698                                        0,
2699                                        MatConjugate_MPIAIJ,
2700                                        0,
2701                                 /*104*/MatSetValuesRow_MPIAIJ,
2702                                        MatRealPart_MPIAIJ,
2703                                        MatImaginaryPart_MPIAIJ,
2704                                        0,
2705                                        0,
2706                                 /*109*/0,
2707                                        0,
2708                                        MatGetRowMin_MPIAIJ,
2709                                        0,
2710                                        MatMissingDiagonal_MPIAIJ,
2711                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2712                                        0,
2713                                        MatGetGhosts_MPIAIJ,
2714                                        0,
2715                                        0,
2716                                 /*119*/0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        MatGetMultiProcBlock_MPIAIJ,
2721                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2722                                        MatGetColumnNorms_MPIAIJ,
2723                                        MatInvertBlockDiagonal_MPIAIJ,
2724                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2725                                        MatCreateSubMatricesMPI_MPIAIJ,
2726                                 /*129*/0,
2727                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2728                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2729                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2730                                        0,
2731                                 /*134*/0,
2732                                        0,
2733                                        MatRARt_MPIAIJ_MPIAIJ,
2734                                        0,
2735                                        0,
2736                                 /*139*/MatSetBlockSizes_MPIAIJ,
2737                                        0,
2738                                        0,
2739                                        MatFDColoringSetUp_MPIXAIJ,
2740                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2741                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2742 };
2743 
2744 /* ----------------------------------------------------------------------------------------*/
2745 
2746 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2747 {
2748   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2753   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2758 {
2759   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2760   PetscErrorCode ierr;
2761 
2762   PetscFunctionBegin;
2763   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2764   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2769 {
2770   Mat_MPIAIJ     *b;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2775   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2776   b = (Mat_MPIAIJ*)B->data;
2777 
2778 #if defined(PETSC_USE_CTABLE)
2779   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2780 #else
2781   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2782 #endif
2783   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2784   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2785   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2786 
2787   /* Because the B will have been resized we simply destroy it and create a new one each time */
2788   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2789   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2790   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2791   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2792   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2793   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2794 
2795   if (!B->preallocated) {
2796     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2797     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2798     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2799     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2800     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2801   }
2802 
2803   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2804   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2805   B->preallocated  = PETSC_TRUE;
2806   B->was_assembled = PETSC_FALSE;
2807   B->assembled     = PETSC_FALSE;;
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2812 {
2813   Mat_MPIAIJ     *b;
2814   PetscErrorCode ierr;
2815 
2816   PetscFunctionBegin;
2817   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2818   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2819   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2820   b = (Mat_MPIAIJ*)B->data;
2821 
2822 #if defined(PETSC_USE_CTABLE)
2823   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2824 #else
2825   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2826 #endif
2827   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2828   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2829   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2830 
2831   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2832   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2833   B->preallocated  = PETSC_TRUE;
2834   B->was_assembled = PETSC_FALSE;
2835   B->assembled = PETSC_FALSE;
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2840 {
2841   Mat            mat;
2842   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2843   PetscErrorCode ierr;
2844 
2845   PetscFunctionBegin;
2846   *newmat = 0;
2847   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2848   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2849   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2850   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2851   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2852   a       = (Mat_MPIAIJ*)mat->data;
2853 
2854   mat->factortype   = matin->factortype;
2855   mat->assembled    = PETSC_TRUE;
2856   mat->insertmode   = NOT_SET_VALUES;
2857   mat->preallocated = PETSC_TRUE;
2858 
2859   a->size         = oldmat->size;
2860   a->rank         = oldmat->rank;
2861   a->donotstash   = oldmat->donotstash;
2862   a->roworiented  = oldmat->roworiented;
2863   a->rowindices   = 0;
2864   a->rowvalues    = 0;
2865   a->getrowactive = PETSC_FALSE;
2866 
2867   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2868   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2869 
2870   if (oldmat->colmap) {
2871 #if defined(PETSC_USE_CTABLE)
2872     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2873 #else
2874     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2875     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2876     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2877 #endif
2878   } else a->colmap = 0;
2879   if (oldmat->garray) {
2880     PetscInt len;
2881     len  = oldmat->B->cmap->n;
2882     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2883     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2884     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2885   } else a->garray = 0;
2886 
2887   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2888   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2889   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2890   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2891 
2892   if (oldmat->Mvctx_mpi1) {
2893     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2894     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2895   }
2896 
2897   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2898   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2899   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2900   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2901   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2902   *newmat = mat;
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2907 {
2908   PetscScalar    *vals,*svals;
2909   MPI_Comm       comm;
2910   PetscErrorCode ierr;
2911   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2912   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2913   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2914   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2915   PetscInt       cend,cstart,n,*rowners;
2916   int            fd;
2917   PetscInt       bs = newMat->rmap->bs;
2918 
2919   PetscFunctionBegin;
2920   /* force binary viewer to load .info file if it has not yet done so */
2921   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2922   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2923   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2924   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2925   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2926   if (!rank) {
2927     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2928     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2929     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2930   }
2931 
2932   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2933   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2934   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2935   if (bs < 0) bs = 1;
2936 
2937   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2938   M    = header[1]; N = header[2];
2939 
2940   /* If global sizes are set, check if they are consistent with that given in the file */
2941   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2942   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2943 
2944   /* determine ownership of all (block) rows */
2945   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2946   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2947   else m = newMat->rmap->n; /* Set by user */
2948 
2949   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2950   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2951 
2952   /* First process needs enough room for process with most rows */
2953   if (!rank) {
2954     mmax = rowners[1];
2955     for (i=2; i<=size; i++) {
2956       mmax = PetscMax(mmax, rowners[i]);
2957     }
2958   } else mmax = -1;             /* unused, but compilers complain */
2959 
2960   rowners[0] = 0;
2961   for (i=2; i<=size; i++) {
2962     rowners[i] += rowners[i-1];
2963   }
2964   rstart = rowners[rank];
2965   rend   = rowners[rank+1];
2966 
2967   /* distribute row lengths to all processors */
2968   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2969   if (!rank) {
2970     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2971     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2972     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2973     for (j=0; j<m; j++) {
2974       procsnz[0] += ourlens[j];
2975     }
2976     for (i=1; i<size; i++) {
2977       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2978       /* calculate the number of nonzeros on each processor */
2979       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2980         procsnz[i] += rowlengths[j];
2981       }
2982       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2983     }
2984     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2985   } else {
2986     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2987   }
2988 
2989   if (!rank) {
2990     /* determine max buffer needed and allocate it */
2991     maxnz = 0;
2992     for (i=0; i<size; i++) {
2993       maxnz = PetscMax(maxnz,procsnz[i]);
2994     }
2995     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2996 
2997     /* read in my part of the matrix column indices  */
2998     nz   = procsnz[0];
2999     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3000     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3001 
3002     /* read in every one elses and ship off */
3003     for (i=1; i<size; i++) {
3004       nz   = procsnz[i];
3005       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3006       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3007     }
3008     ierr = PetscFree(cols);CHKERRQ(ierr);
3009   } else {
3010     /* determine buffer space needed for message */
3011     nz = 0;
3012     for (i=0; i<m; i++) {
3013       nz += ourlens[i];
3014     }
3015     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3016 
3017     /* receive message of column indices*/
3018     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3019   }
3020 
3021   /* determine column ownership if matrix is not square */
3022   if (N != M) {
3023     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3024     else n = newMat->cmap->n;
3025     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3026     cstart = cend - n;
3027   } else {
3028     cstart = rstart;
3029     cend   = rend;
3030     n      = cend - cstart;
3031   }
3032 
3033   /* loop over local rows, determining number of off diagonal entries */
3034   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3035   jj   = 0;
3036   for (i=0; i<m; i++) {
3037     for (j=0; j<ourlens[i]; j++) {
3038       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3039       jj++;
3040     }
3041   }
3042 
3043   for (i=0; i<m; i++) {
3044     ourlens[i] -= offlens[i];
3045   }
3046   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3047 
3048   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3049 
3050   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3051 
3052   for (i=0; i<m; i++) {
3053     ourlens[i] += offlens[i];
3054   }
3055 
3056   if (!rank) {
3057     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3058 
3059     /* read in my part of the matrix numerical values  */
3060     nz   = procsnz[0];
3061     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3062 
3063     /* insert into matrix */
3064     jj      = rstart;
3065     smycols = mycols;
3066     svals   = vals;
3067     for (i=0; i<m; i++) {
3068       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3069       smycols += ourlens[i];
3070       svals   += ourlens[i];
3071       jj++;
3072     }
3073 
3074     /* read in other processors and ship out */
3075     for (i=1; i<size; i++) {
3076       nz   = procsnz[i];
3077       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3078       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3079     }
3080     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3081   } else {
3082     /* receive numeric values */
3083     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3084 
3085     /* receive message of values*/
3086     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3087 
3088     /* insert into matrix */
3089     jj      = rstart;
3090     smycols = mycols;
3091     svals   = vals;
3092     for (i=0; i<m; i++) {
3093       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3094       smycols += ourlens[i];
3095       svals   += ourlens[i];
3096       jj++;
3097     }
3098   }
3099   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3100   ierr = PetscFree(vals);CHKERRQ(ierr);
3101   ierr = PetscFree(mycols);CHKERRQ(ierr);
3102   ierr = PetscFree(rowners);CHKERRQ(ierr);
3103   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3104   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 /* Not scalable because of ISAllGather() unless getting all columns. */
3109 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3110 {
3111   PetscErrorCode ierr;
3112   IS             iscol_local;
3113   PetscBool      isstride;
3114   PetscMPIInt    lisstride=0,gisstride;
3115 
3116   PetscFunctionBegin;
3117   /* check if we are grabbing all columns*/
3118   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3119 
3120   if (isstride) {
3121     PetscInt  start,len,mstart,mlen;
3122     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3123     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3124     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3125     if (mstart == start && mlen-mstart == len) lisstride = 1;
3126   }
3127 
3128   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3129   if (gisstride) {
3130     PetscInt N;
3131     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3132     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3133     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3134     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3135   } else {
3136     PetscInt cbs;
3137     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3138     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3139     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3140   }
3141 
3142   *isseq = iscol_local;
3143   PetscFunctionReturn(0);
3144 }
3145 
3146 /*
3147  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3148  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3149 
3150  Input Parameters:
3151    mat - matrix
3152    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3153            i.e., mat->rstart <= isrow[i] < mat->rend
3154    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3155            i.e., mat->cstart <= iscol[i] < mat->cend
3156  Output Parameter:
3157    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3158    iscol_o - sequential column index set for retrieving mat->B
3159    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3160  */
3161 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3162 {
3163   PetscErrorCode ierr;
3164   Vec            x,cmap;
3165   const PetscInt *is_idx;
3166   PetscScalar    *xarray,*cmaparray;
3167   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3168   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3169   Mat            B=a->B;
3170   Vec            lvec=a->lvec,lcmap;
3171   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3172   MPI_Comm       comm;
3173   VecScatter     Mvctx=a->Mvctx;
3174 
3175   PetscFunctionBegin;
3176   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3177   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3178 
3179   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3180   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3181   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3182   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3183   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3184 
3185   /* Get start indices */
3186   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3187   isstart -= ncols;
3188   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3189 
3190   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3191   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3192   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3193   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3194   for (i=0; i<ncols; i++) {
3195     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3196     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3197     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3198   }
3199   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3200   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3201   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3202 
3203   /* Get iscol_d */
3204   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3205   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3206   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3207 
3208   /* Get isrow_d */
3209   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3210   rstart = mat->rmap->rstart;
3211   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3212   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3213   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3214   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3215 
3216   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3217   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3218   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3219 
3220   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3221   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3222   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3223 
3224   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3225 
3226   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3227   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3228 
3229   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3230   /* off-process column indices */
3231   count = 0;
3232   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3233   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3234 
3235   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3236   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3237   for (i=0; i<Bn; i++) {
3238     if (PetscRealPart(xarray[i]) > -1.0) {
3239       idx[count]     = i;                   /* local column index in off-diagonal part B */
3240       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3241       count++;
3242     }
3243   }
3244   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3245   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3246 
3247   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3248   /* cannot ensure iscol_o has same blocksize as iscol! */
3249 
3250   ierr = PetscFree(idx);CHKERRQ(ierr);
3251   *garray = cmap1;
3252 
3253   ierr = VecDestroy(&x);CHKERRQ(ierr);
3254   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3255   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3261 {
3262   PetscErrorCode ierr;
3263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3264   Mat            M = NULL;
3265   MPI_Comm       comm;
3266   IS             iscol_d,isrow_d,iscol_o;
3267   Mat            Asub = NULL,Bsub = NULL;
3268   PetscInt       n;
3269 
3270   PetscFunctionBegin;
3271   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3272 
3273   if (call == MAT_REUSE_MATRIX) {
3274     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3275     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3276     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3277 
3278     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3279     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3280 
3281     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3282     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3283 
3284     /* Update diagonal and off-diagonal portions of submat */
3285     asub = (Mat_MPIAIJ*)(*submat)->data;
3286     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3287     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3288     if (n) {
3289       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3290     }
3291     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3292     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3293 
3294   } else { /* call == MAT_INITIAL_MATRIX) */
3295     const PetscInt *garray;
3296     PetscInt        BsubN;
3297 
3298     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3299     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3300 
3301     /* Create local submatrices Asub and Bsub */
3302     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3303     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3304 
3305     /* Create submatrix M */
3306     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3307 
3308     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3309     asub = (Mat_MPIAIJ*)M->data;
3310 
3311     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3312     n = asub->B->cmap->N;
3313     if (BsubN > n) {
3314       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3315       const PetscInt *idx;
3316       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3317       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3318 
3319       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3320       j = 0;
3321       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3322       for (i=0; i<n; i++) {
3323         if (j >= BsubN) break;
3324         while (subgarray[i] > garray[j]) j++;
3325 
3326         if (subgarray[i] == garray[j]) {
3327           idx_new[i] = idx[j++];
3328         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3329       }
3330       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3331 
3332       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3333       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3334 
3335     } else if (BsubN < n) {
3336       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3337     }
3338 
3339     ierr = PetscFree(garray);CHKERRQ(ierr);
3340     *submat = M;
3341 
3342     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3343     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3344     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3345 
3346     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3347     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3348 
3349     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3350     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3351   }
3352   PetscFunctionReturn(0);
3353 }
3354 
3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3356 {
3357   PetscErrorCode ierr;
3358   IS             iscol_local=NULL,isrow_d;
3359   PetscInt       csize;
3360   PetscInt       n,i,j,start,end;
3361   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3362   MPI_Comm       comm;
3363 
3364   PetscFunctionBegin;
3365   /* If isrow has same processor distribution as mat,
3366      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3367   if (call == MAT_REUSE_MATRIX) {
3368     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3369     if (isrow_d) {
3370       sameRowDist  = PETSC_TRUE;
3371       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3372     } else {
3373       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3374       if (iscol_local) {
3375         sameRowDist  = PETSC_TRUE;
3376         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3377       }
3378     }
3379   } else {
3380     /* Check if isrow has same processor distribution as mat */
3381     sameDist[0] = PETSC_FALSE;
3382     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3383     if (!n) {
3384       sameDist[0] = PETSC_TRUE;
3385     } else {
3386       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3387       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3388       if (i >= start && j < end) {
3389         sameDist[0] = PETSC_TRUE;
3390       }
3391     }
3392 
3393     /* Check if iscol has same processor distribution as mat */
3394     sameDist[1] = PETSC_FALSE;
3395     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3396     if (!n) {
3397       sameDist[1] = PETSC_TRUE;
3398     } else {
3399       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3400       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3401       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3402     }
3403 
3404     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3405     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3406     sameRowDist = tsameDist[0];
3407   }
3408 
3409   if (sameRowDist) {
3410     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3411       /* isrow and iscol have same processor distribution as mat */
3412       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3413       PetscFunctionReturn(0);
3414     } else { /* sameRowDist */
3415       /* isrow has same processor distribution as mat */
3416       if (call == MAT_INITIAL_MATRIX) {
3417         PetscBool sorted;
3418         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3419         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3420         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3421         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3422 
3423         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3424         if (sorted) {
3425           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3426           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3427           PetscFunctionReturn(0);
3428         }
3429       } else { /* call == MAT_REUSE_MATRIX */
3430         IS    iscol_sub;
3431         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3432         if (iscol_sub) {
3433           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3434           PetscFunctionReturn(0);
3435         }
3436       }
3437     }
3438   }
3439 
3440   /* General case: iscol -> iscol_local which has global size of iscol */
3441   if (call == MAT_REUSE_MATRIX) {
3442     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3443     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3444   } else {
3445     if (!iscol_local) {
3446       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3447     }
3448   }
3449 
3450   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3451   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3452 
3453   if (call == MAT_INITIAL_MATRIX) {
3454     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3455     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3456   }
3457   PetscFunctionReturn(0);
3458 }
3459 
3460 /*@C
3461      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3462          and "off-diagonal" part of the matrix in CSR format.
3463 
3464    Collective on MPI_Comm
3465 
3466    Input Parameters:
3467 +  comm - MPI communicator
3468 .  A - "diagonal" portion of matrix
3469 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3470 -  garray - global index of B columns
3471 
3472    Output Parameter:
3473 .   mat - the matrix, with input A as its local diagonal matrix
3474    Level: advanced
3475 
3476    Notes:
3477        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3478        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3479 
3480 .seealso: MatCreateMPIAIJWithSplitArrays()
3481 @*/
3482 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3483 {
3484   PetscErrorCode ierr;
3485   Mat_MPIAIJ     *maij;
3486   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3487   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3488   PetscScalar    *oa=b->a;
3489   Mat            Bnew;
3490   PetscInt       m,n,N;
3491 
3492   PetscFunctionBegin;
3493   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3494   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3495   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3496   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3497   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3498   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3499 
3500   /* Get global columns of mat */
3501   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3502 
3503   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3504   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3505   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3506   maij = (Mat_MPIAIJ*)(*mat)->data;
3507 
3508   (*mat)->preallocated = PETSC_TRUE;
3509 
3510   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3511   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3512 
3513   /* Set A as diagonal portion of *mat */
3514   maij->A = A;
3515 
3516   nz = oi[m];
3517   for (i=0; i<nz; i++) {
3518     col   = oj[i];
3519     oj[i] = garray[col];
3520   }
3521 
3522    /* Set Bnew as off-diagonal portion of *mat */
3523   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3524   bnew        = (Mat_SeqAIJ*)Bnew->data;
3525   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3526   maij->B     = Bnew;
3527 
3528   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3529 
3530   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3531   b->free_a       = PETSC_FALSE;
3532   b->free_ij      = PETSC_FALSE;
3533   ierr = MatDestroy(&B);CHKERRQ(ierr);
3534 
3535   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3536   bnew->free_a       = PETSC_TRUE;
3537   bnew->free_ij      = PETSC_TRUE;
3538 
3539   /* condense columns of maij->B */
3540   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3541   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3542   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3543   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3544   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3549 
3550 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3551 {
3552   PetscErrorCode ierr;
3553   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3554   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3555   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3556   Mat            M,Msub,B=a->B;
3557   MatScalar      *aa;
3558   Mat_SeqAIJ     *aij;
3559   PetscInt       *garray = a->garray,*colsub,Ncols;
3560   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3561   IS             iscol_sub,iscmap;
3562   const PetscInt *is_idx,*cmap;
3563   PetscBool      allcolumns=PETSC_FALSE;
3564   MPI_Comm       comm;
3565 
3566   PetscFunctionBegin;
3567   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3568 
3569   if (call == MAT_REUSE_MATRIX) {
3570     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3571     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3572     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3573 
3574     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3575     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3576 
3577     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3578     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3579 
3580     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3581 
3582   } else { /* call == MAT_INITIAL_MATRIX) */
3583     PetscBool flg;
3584 
3585     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3586     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3587 
3588     /* (1) iscol -> nonscalable iscol_local */
3589     /* Check for special case: each processor gets entire matrix columns */
3590     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3591     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3592     if (allcolumns) {
3593       iscol_sub = iscol_local;
3594       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3595       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3596 
3597     } else {
3598       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3599       PetscInt *idx,*cmap1,k;
3600       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3601       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3602       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3603       count = 0;
3604       k     = 0;
3605       for (i=0; i<Ncols; i++) {
3606         j = is_idx[i];
3607         if (j >= cstart && j < cend) {
3608           /* diagonal part of mat */
3609           idx[count]     = j;
3610           cmap1[count++] = i; /* column index in submat */
3611         } else if (Bn) {
3612           /* off-diagonal part of mat */
3613           if (j == garray[k]) {
3614             idx[count]     = j;
3615             cmap1[count++] = i;  /* column index in submat */
3616           } else if (j > garray[k]) {
3617             while (j > garray[k] && k < Bn-1) k++;
3618             if (j == garray[k]) {
3619               idx[count]     = j;
3620               cmap1[count++] = i; /* column index in submat */
3621             }
3622           }
3623         }
3624       }
3625       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3626 
3627       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3628       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3629       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3630 
3631       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3632     }
3633 
3634     /* (3) Create sequential Msub */
3635     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3636   }
3637 
3638   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3639   aij  = (Mat_SeqAIJ*)(Msub)->data;
3640   ii   = aij->i;
3641   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3642 
3643   /*
3644       m - number of local rows
3645       Ncols - number of columns (same on all processors)
3646       rstart - first row in new global matrix generated
3647   */
3648   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3649 
3650   if (call == MAT_INITIAL_MATRIX) {
3651     /* (4) Create parallel newmat */
3652     PetscMPIInt    rank,size;
3653     PetscInt       csize;
3654 
3655     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3656     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3657 
3658     /*
3659         Determine the number of non-zeros in the diagonal and off-diagonal
3660         portions of the matrix in order to do correct preallocation
3661     */
3662 
3663     /* first get start and end of "diagonal" columns */
3664     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3665     if (csize == PETSC_DECIDE) {
3666       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3667       if (mglobal == Ncols) { /* square matrix */
3668         nlocal = m;
3669       } else {
3670         nlocal = Ncols/size + ((Ncols % size) > rank);
3671       }
3672     } else {
3673       nlocal = csize;
3674     }
3675     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3676     rstart = rend - nlocal;
3677     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3678 
3679     /* next, compute all the lengths */
3680     jj    = aij->j;
3681     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3682     olens = dlens + m;
3683     for (i=0; i<m; i++) {
3684       jend = ii[i+1] - ii[i];
3685       olen = 0;
3686       dlen = 0;
3687       for (j=0; j<jend; j++) {
3688         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3689         else dlen++;
3690         jj++;
3691       }
3692       olens[i] = olen;
3693       dlens[i] = dlen;
3694     }
3695 
3696     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3697     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3698 
3699     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3700     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3701     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3702     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3703     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3704     ierr = PetscFree(dlens);CHKERRQ(ierr);
3705 
3706   } else { /* call == MAT_REUSE_MATRIX */
3707     M    = *newmat;
3708     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3709     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3710     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3711     /*
3712          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3713        rather than the slower MatSetValues().
3714     */
3715     M->was_assembled = PETSC_TRUE;
3716     M->assembled     = PETSC_FALSE;
3717   }
3718 
3719   /* (5) Set values of Msub to *newmat */
3720   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3721   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3722 
3723   jj   = aij->j;
3724   aa   = aij->a;
3725   for (i=0; i<m; i++) {
3726     row = rstart + i;
3727     nz  = ii[i+1] - ii[i];
3728     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3729     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3730     jj += nz; aa += nz;
3731   }
3732   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3733 
3734   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3736 
3737   ierr = PetscFree(colsub);CHKERRQ(ierr);
3738 
3739   /* save Msub, iscol_sub and iscmap used in processor for next request */
3740   if (call ==  MAT_INITIAL_MATRIX) {
3741     *newmat = M;
3742     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3743     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3744 
3745     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3746     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3747 
3748     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3749     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3750 
3751     if (iscol_local) {
3752       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3753       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3754     }
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 /*
3760     Not great since it makes two copies of the submatrix, first an SeqAIJ
3761   in local and then by concatenating the local matrices the end result.
3762   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3763 
3764   Note: This requires a sequential iscol with all indices.
3765 */
3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3767 {
3768   PetscErrorCode ierr;
3769   PetscMPIInt    rank,size;
3770   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3771   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3772   Mat            M,Mreuse;
3773   MatScalar      *aa,*vwork;
3774   MPI_Comm       comm;
3775   Mat_SeqAIJ     *aij;
3776   PetscBool      colflag,allcolumns=PETSC_FALSE;
3777 
3778   PetscFunctionBegin;
3779   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3780   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3781   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3782 
3783   /* Check for special case: each processor gets entire matrix columns */
3784   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3785   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3786   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3787 
3788   if (call ==  MAT_REUSE_MATRIX) {
3789     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3790     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3791     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3792   } else {
3793     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3794   }
3795 
3796   /*
3797       m - number of local rows
3798       n - number of columns (same on all processors)
3799       rstart - first row in new global matrix generated
3800   */
3801   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3802   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3803   if (call == MAT_INITIAL_MATRIX) {
3804     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3805     ii  = aij->i;
3806     jj  = aij->j;
3807 
3808     /*
3809         Determine the number of non-zeros in the diagonal and off-diagonal
3810         portions of the matrix in order to do correct preallocation
3811     */
3812 
3813     /* first get start and end of "diagonal" columns */
3814     if (csize == PETSC_DECIDE) {
3815       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3816       if (mglobal == n) { /* square matrix */
3817         nlocal = m;
3818       } else {
3819         nlocal = n/size + ((n % size) > rank);
3820       }
3821     } else {
3822       nlocal = csize;
3823     }
3824     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3825     rstart = rend - nlocal;
3826     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3827 
3828     /* next, compute all the lengths */
3829     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3830     olens = dlens + m;
3831     for (i=0; i<m; i++) {
3832       jend = ii[i+1] - ii[i];
3833       olen = 0;
3834       dlen = 0;
3835       for (j=0; j<jend; j++) {
3836         if (*jj < rstart || *jj >= rend) olen++;
3837         else dlen++;
3838         jj++;
3839       }
3840       olens[i] = olen;
3841       dlens[i] = dlen;
3842     }
3843     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3844     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3845     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3846     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3847     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3848     ierr = PetscFree(dlens);CHKERRQ(ierr);
3849   } else {
3850     PetscInt ml,nl;
3851 
3852     M    = *newmat;
3853     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3854     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3855     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3856     /*
3857          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3858        rather than the slower MatSetValues().
3859     */
3860     M->was_assembled = PETSC_TRUE;
3861     M->assembled     = PETSC_FALSE;
3862   }
3863   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3864   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3865   ii   = aij->i;
3866   jj   = aij->j;
3867   aa   = aij->a;
3868   for (i=0; i<m; i++) {
3869     row   = rstart + i;
3870     nz    = ii[i+1] - ii[i];
3871     cwork = jj;     jj += nz;
3872     vwork = aa;     aa += nz;
3873     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3874   }
3875 
3876   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3877   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   *newmat = M;
3879 
3880   /* save submatrix used in processor for next request */
3881   if (call ==  MAT_INITIAL_MATRIX) {
3882     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3883     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3884   }
3885   PetscFunctionReturn(0);
3886 }
3887 
3888 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3889 {
3890   PetscInt       m,cstart, cend,j,nnz,i,d;
3891   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3892   const PetscInt *JJ;
3893   PetscScalar    *values;
3894   PetscErrorCode ierr;
3895   PetscBool      nooffprocentries;
3896 
3897   PetscFunctionBegin;
3898   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3899 
3900   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3901   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3902   m      = B->rmap->n;
3903   cstart = B->cmap->rstart;
3904   cend   = B->cmap->rend;
3905   rstart = B->rmap->rstart;
3906 
3907   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3908 
3909 #if defined(PETSC_USE_DEBUG)
3910   for (i=0; i<m; i++) {
3911     nnz = Ii[i+1]- Ii[i];
3912     JJ  = J + Ii[i];
3913     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3914     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3915     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3916   }
3917 #endif
3918 
3919   for (i=0; i<m; i++) {
3920     nnz     = Ii[i+1]- Ii[i];
3921     JJ      = J + Ii[i];
3922     nnz_max = PetscMax(nnz_max,nnz);
3923     d       = 0;
3924     for (j=0; j<nnz; j++) {
3925       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3926     }
3927     d_nnz[i] = d;
3928     o_nnz[i] = nnz - d;
3929   }
3930   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3931   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3932 
3933   if (v) values = (PetscScalar*)v;
3934   else {
3935     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3936   }
3937 
3938   for (i=0; i<m; i++) {
3939     ii   = i + rstart;
3940     nnz  = Ii[i+1]- Ii[i];
3941     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3942   }
3943   nooffprocentries    = B->nooffprocentries;
3944   B->nooffprocentries = PETSC_TRUE;
3945   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3946   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3947   B->nooffprocentries = nooffprocentries;
3948 
3949   if (!v) {
3950     ierr = PetscFree(values);CHKERRQ(ierr);
3951   }
3952   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3953   PetscFunctionReturn(0);
3954 }
3955 
3956 /*@
3957    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3958    (the default parallel PETSc format).
3959 
3960    Collective on MPI_Comm
3961 
3962    Input Parameters:
3963 +  B - the matrix
3964 .  i - the indices into j for the start of each local row (starts with zero)
3965 .  j - the column indices for each local row (starts with zero)
3966 -  v - optional values in the matrix
3967 
3968    Level: developer
3969 
3970    Notes:
3971        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3972      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3973      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3974 
3975        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3976 
3977        The format which is used for the sparse matrix input, is equivalent to a
3978     row-major ordering.. i.e for the following matrix, the input data expected is
3979     as shown
3980 
3981 $        1 0 0
3982 $        2 0 3     P0
3983 $       -------
3984 $        4 5 6     P1
3985 $
3986 $     Process0 [P0]: rows_owned=[0,1]
3987 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3988 $        j =  {0,0,2}  [size = 3]
3989 $        v =  {1,2,3}  [size = 3]
3990 $
3991 $     Process1 [P1]: rows_owned=[2]
3992 $        i =  {0,3}    [size = nrow+1  = 1+1]
3993 $        j =  {0,1,2}  [size = 3]
3994 $        v =  {4,5,6}  [size = 3]
3995 
3996 .keywords: matrix, aij, compressed row, sparse, parallel
3997 
3998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3999           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4000 @*/
4001 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4002 {
4003   PetscErrorCode ierr;
4004 
4005   PetscFunctionBegin;
4006   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 /*@C
4011    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4012    (the default parallel PETSc format).  For good matrix assembly performance
4013    the user should preallocate the matrix storage by setting the parameters
4014    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4015    performance can be increased by more than a factor of 50.
4016 
4017    Collective on MPI_Comm
4018 
4019    Input Parameters:
4020 +  B - the matrix
4021 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4022            (same value is used for all local rows)
4023 .  d_nnz - array containing the number of nonzeros in the various rows of the
4024            DIAGONAL portion of the local submatrix (possibly different for each row)
4025            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4026            The size of this array is equal to the number of local rows, i.e 'm'.
4027            For matrices that will be factored, you must leave room for (and set)
4028            the diagonal entry even if it is zero.
4029 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4030            submatrix (same value is used for all local rows).
4031 -  o_nnz - array containing the number of nonzeros in the various rows of the
4032            OFF-DIAGONAL portion of the local submatrix (possibly different for
4033            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4034            structure. The size of this array is equal to the number
4035            of local rows, i.e 'm'.
4036 
4037    If the *_nnz parameter is given then the *_nz parameter is ignored
4038 
4039    The AIJ format (also called the Yale sparse matrix format or
4040    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4041    storage.  The stored row and column indices begin with zero.
4042    See Users-Manual: ch_mat for details.
4043 
4044    The parallel matrix is partitioned such that the first m0 rows belong to
4045    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4046    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4047 
4048    The DIAGONAL portion of the local submatrix of a processor can be defined
4049    as the submatrix which is obtained by extraction the part corresponding to
4050    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4051    first row that belongs to the processor, r2 is the last row belonging to
4052    the this processor, and c1-c2 is range of indices of the local part of a
4053    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4054    common case of a square matrix, the row and column ranges are the same and
4055    the DIAGONAL part is also square. The remaining portion of the local
4056    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4057 
4058    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4059 
4060    You can call MatGetInfo() to get information on how effective the preallocation was;
4061    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4062    You can also run with the option -info and look for messages with the string
4063    malloc in them to see if additional memory allocation was needed.
4064 
4065    Example usage:
4066 
4067    Consider the following 8x8 matrix with 34 non-zero values, that is
4068    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4069    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4070    as follows:
4071 
4072 .vb
4073             1  2  0  |  0  3  0  |  0  4
4074     Proc0   0  5  6  |  7  0  0  |  8  0
4075             9  0 10  | 11  0  0  | 12  0
4076     -------------------------------------
4077            13  0 14  | 15 16 17  |  0  0
4078     Proc1   0 18  0  | 19 20 21  |  0  0
4079             0  0  0  | 22 23  0  | 24  0
4080     -------------------------------------
4081     Proc2  25 26 27  |  0  0 28  | 29  0
4082            30  0  0  | 31 32 33  |  0 34
4083 .ve
4084 
4085    This can be represented as a collection of submatrices as:
4086 
4087 .vb
4088       A B C
4089       D E F
4090       G H I
4091 .ve
4092 
4093    Where the submatrices A,B,C are owned by proc0, D,E,F are
4094    owned by proc1, G,H,I are owned by proc2.
4095 
4096    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4098    The 'M','N' parameters are 8,8, and have the same values on all procs.
4099 
4100    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4101    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4102    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4103    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4104    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4105    matrix, ans [DF] as another SeqAIJ matrix.
4106 
4107    When d_nz, o_nz parameters are specified, d_nz storage elements are
4108    allocated for every row of the local diagonal submatrix, and o_nz
4109    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4110    One way to choose d_nz and o_nz is to use the max nonzerors per local
4111    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4112    In this case, the values of d_nz,o_nz are:
4113 .vb
4114      proc0 : dnz = 2, o_nz = 2
4115      proc1 : dnz = 3, o_nz = 2
4116      proc2 : dnz = 1, o_nz = 4
4117 .ve
4118    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4119    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4120    for proc3. i.e we are using 12+15+10=37 storage locations to store
4121    34 values.
4122 
4123    When d_nnz, o_nnz parameters are specified, the storage is specified
4124    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4125    In the above case the values for d_nnz,o_nnz are:
4126 .vb
4127      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4128      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4129      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4130 .ve
4131    Here the space allocated is sum of all the above values i.e 34, and
4132    hence pre-allocation is perfect.
4133 
4134    Level: intermediate
4135 
4136 .keywords: matrix, aij, compressed row, sparse, parallel
4137 
4138 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4139           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4140 @*/
4141 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4142 {
4143   PetscErrorCode ierr;
4144 
4145   PetscFunctionBegin;
4146   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4147   PetscValidType(B,1);
4148   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4149   PetscFunctionReturn(0);
4150 }
4151 
4152 /*@
4153      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4154          CSR format the local rows.
4155 
4156    Collective on MPI_Comm
4157 
4158    Input Parameters:
4159 +  comm - MPI communicator
4160 .  m - number of local rows (Cannot be PETSC_DECIDE)
4161 .  n - This value should be the same as the local size used in creating the
4162        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4163        calculated if N is given) For square matrices n is almost always m.
4164 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4165 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4166 .   i - row indices
4167 .   j - column indices
4168 -   a - matrix values
4169 
4170    Output Parameter:
4171 .   mat - the matrix
4172 
4173    Level: intermediate
4174 
4175    Notes:
4176        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4177      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4178      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4179 
4180        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4181 
4182        The format which is used for the sparse matrix input, is equivalent to a
4183     row-major ordering.. i.e for the following matrix, the input data expected is
4184     as shown
4185 
4186 $        1 0 0
4187 $        2 0 3     P0
4188 $       -------
4189 $        4 5 6     P1
4190 $
4191 $     Process0 [P0]: rows_owned=[0,1]
4192 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4193 $        j =  {0,0,2}  [size = 3]
4194 $        v =  {1,2,3}  [size = 3]
4195 $
4196 $     Process1 [P1]: rows_owned=[2]
4197 $        i =  {0,3}    [size = nrow+1  = 1+1]
4198 $        j =  {0,1,2}  [size = 3]
4199 $        v =  {4,5,6}  [size = 3]
4200 
4201 .keywords: matrix, aij, compressed row, sparse, parallel
4202 
4203 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4204           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4205 @*/
4206 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4207 {
4208   PetscErrorCode ierr;
4209 
4210   PetscFunctionBegin;
4211   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4212   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4213   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4214   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4215   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4216   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4217   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4218   PetscFunctionReturn(0);
4219 }
4220 
4221 /*@C
4222    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4223    (the default parallel PETSc format).  For good matrix assembly performance
4224    the user should preallocate the matrix storage by setting the parameters
4225    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4226    performance can be increased by more than a factor of 50.
4227 
4228    Collective on MPI_Comm
4229 
4230    Input Parameters:
4231 +  comm - MPI communicator
4232 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4233            This value should be the same as the local size used in creating the
4234            y vector for the matrix-vector product y = Ax.
4235 .  n - This value should be the same as the local size used in creating the
4236        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4237        calculated if N is given) For square matrices n is almost always m.
4238 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4239 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4240 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4241            (same value is used for all local rows)
4242 .  d_nnz - array containing the number of nonzeros in the various rows of the
4243            DIAGONAL portion of the local submatrix (possibly different for each row)
4244            or NULL, if d_nz is used to specify the nonzero structure.
4245            The size of this array is equal to the number of local rows, i.e 'm'.
4246 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4247            submatrix (same value is used for all local rows).
4248 -  o_nnz - array containing the number of nonzeros in the various rows of the
4249            OFF-DIAGONAL portion of the local submatrix (possibly different for
4250            each row) or NULL, if o_nz is used to specify the nonzero
4251            structure. The size of this array is equal to the number
4252            of local rows, i.e 'm'.
4253 
4254    Output Parameter:
4255 .  A - the matrix
4256 
4257    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4258    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4259    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4260 
4261    Notes:
4262    If the *_nnz parameter is given then the *_nz parameter is ignored
4263 
4264    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4265    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4266    storage requirements for this matrix.
4267 
4268    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4269    processor than it must be used on all processors that share the object for
4270    that argument.
4271 
4272    The user MUST specify either the local or global matrix dimensions
4273    (possibly both).
4274 
4275    The parallel matrix is partitioned across processors such that the
4276    first m0 rows belong to process 0, the next m1 rows belong to
4277    process 1, the next m2 rows belong to process 2 etc.. where
4278    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4279    values corresponding to [m x N] submatrix.
4280 
4281    The columns are logically partitioned with the n0 columns belonging
4282    to 0th partition, the next n1 columns belonging to the next
4283    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4284 
4285    The DIAGONAL portion of the local submatrix on any given processor
4286    is the submatrix corresponding to the rows and columns m,n
4287    corresponding to the given processor. i.e diagonal matrix on
4288    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4289    etc. The remaining portion of the local submatrix [m x (N-n)]
4290    constitute the OFF-DIAGONAL portion. The example below better
4291    illustrates this concept.
4292 
4293    For a square global matrix we define each processor's diagonal portion
4294    to be its local rows and the corresponding columns (a square submatrix);
4295    each processor's off-diagonal portion encompasses the remainder of the
4296    local matrix (a rectangular submatrix).
4297 
4298    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4299 
4300    When calling this routine with a single process communicator, a matrix of
4301    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4302    type of communicator, use the construction mechanism
4303 .vb
4304      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4305 .ve
4306 
4307 $     MatCreate(...,&A);
4308 $     MatSetType(A,MATMPIAIJ);
4309 $     MatSetSizes(A, m,n,M,N);
4310 $     MatMPIAIJSetPreallocation(A,...);
4311 
4312    By default, this format uses inodes (identical nodes) when possible.
4313    We search for consecutive rows with the same nonzero structure, thereby
4314    reusing matrix information to achieve increased efficiency.
4315 
4316    Options Database Keys:
4317 +  -mat_no_inode  - Do not use inodes
4318 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4319 
4320 
4321 
4322    Example usage:
4323 
4324    Consider the following 8x8 matrix with 34 non-zero values, that is
4325    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4326    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4327    as follows
4328 
4329 .vb
4330             1  2  0  |  0  3  0  |  0  4
4331     Proc0   0  5  6  |  7  0  0  |  8  0
4332             9  0 10  | 11  0  0  | 12  0
4333     -------------------------------------
4334            13  0 14  | 15 16 17  |  0  0
4335     Proc1   0 18  0  | 19 20 21  |  0  0
4336             0  0  0  | 22 23  0  | 24  0
4337     -------------------------------------
4338     Proc2  25 26 27  |  0  0 28  | 29  0
4339            30  0  0  | 31 32 33  |  0 34
4340 .ve
4341 
4342    This can be represented as a collection of submatrices as
4343 
4344 .vb
4345       A B C
4346       D E F
4347       G H I
4348 .ve
4349 
4350    Where the submatrices A,B,C are owned by proc0, D,E,F are
4351    owned by proc1, G,H,I are owned by proc2.
4352 
4353    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4354    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4355    The 'M','N' parameters are 8,8, and have the same values on all procs.
4356 
4357    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4358    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4359    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4360    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4361    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4362    matrix, ans [DF] as another SeqAIJ matrix.
4363 
4364    When d_nz, o_nz parameters are specified, d_nz storage elements are
4365    allocated for every row of the local diagonal submatrix, and o_nz
4366    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4367    One way to choose d_nz and o_nz is to use the max nonzerors per local
4368    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4369    In this case, the values of d_nz,o_nz are
4370 .vb
4371      proc0 : dnz = 2, o_nz = 2
4372      proc1 : dnz = 3, o_nz = 2
4373      proc2 : dnz = 1, o_nz = 4
4374 .ve
4375    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4376    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4377    for proc3. i.e we are using 12+15+10=37 storage locations to store
4378    34 values.
4379 
4380    When d_nnz, o_nnz parameters are specified, the storage is specified
4381    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4382    In the above case the values for d_nnz,o_nnz are
4383 .vb
4384      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4385      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4386      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4387 .ve
4388    Here the space allocated is sum of all the above values i.e 34, and
4389    hence pre-allocation is perfect.
4390 
4391    Level: intermediate
4392 
4393 .keywords: matrix, aij, compressed row, sparse, parallel
4394 
4395 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4396           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4397 @*/
4398 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4399 {
4400   PetscErrorCode ierr;
4401   PetscMPIInt    size;
4402 
4403   PetscFunctionBegin;
4404   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4405   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4406   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4407   if (size > 1) {
4408     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4409     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4410   } else {
4411     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4412     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4413   }
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4418 {
4419   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4420   PetscBool      flg;
4421   PetscErrorCode ierr;
4422 
4423   PetscFunctionBegin;
4424   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4425   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4426   if (Ad)     *Ad     = a->A;
4427   if (Ao)     *Ao     = a->B;
4428   if (colmap) *colmap = a->garray;
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4433 {
4434   PetscErrorCode ierr;
4435   PetscInt       m,N,i,rstart,nnz,Ii;
4436   PetscInt       *indx;
4437   PetscScalar    *values;
4438 
4439   PetscFunctionBegin;
4440   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4441   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4442     PetscInt       *dnz,*onz,sum,bs,cbs;
4443 
4444     if (n == PETSC_DECIDE) {
4445       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4446     }
4447     /* Check sum(n) = N */
4448     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4449     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4450 
4451     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4452     rstart -= m;
4453 
4454     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4455     for (i=0; i<m; i++) {
4456       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4457       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4458       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4459     }
4460 
4461     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4462     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4463     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4464     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4465     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4466     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4467     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4468     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4469   }
4470 
4471   /* numeric phase */
4472   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4473   for (i=0; i<m; i++) {
4474     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4475     Ii   = i + rstart;
4476     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4477     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4478   }
4479   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4480   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4481   PetscFunctionReturn(0);
4482 }
4483 
4484 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4485 {
4486   PetscErrorCode    ierr;
4487   PetscMPIInt       rank;
4488   PetscInt          m,N,i,rstart,nnz;
4489   size_t            len;
4490   const PetscInt    *indx;
4491   PetscViewer       out;
4492   char              *name;
4493   Mat               B;
4494   const PetscScalar *values;
4495 
4496   PetscFunctionBegin;
4497   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4498   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4499   /* Should this be the type of the diagonal block of A? */
4500   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4501   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4502   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4503   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4504   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4505   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4506   for (i=0; i<m; i++) {
4507     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4508     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4509     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4510   }
4511   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4512   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4513 
4514   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4515   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4516   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4517   sprintf(name,"%s.%d",outfile,rank);
4518   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4519   ierr = PetscFree(name);CHKERRQ(ierr);
4520   ierr = MatView(B,out);CHKERRQ(ierr);
4521   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4522   ierr = MatDestroy(&B);CHKERRQ(ierr);
4523   PetscFunctionReturn(0);
4524 }
4525 
4526 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4527 {
4528   PetscErrorCode      ierr;
4529   Mat_Merge_SeqsToMPI *merge;
4530   PetscContainer      container;
4531 
4532   PetscFunctionBegin;
4533   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4534   if (container) {
4535     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4536     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4537     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4538     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4539     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4540     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4541     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4542     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4548     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4549     ierr = PetscFree(merge);CHKERRQ(ierr);
4550     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4551   }
4552   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4553   PetscFunctionReturn(0);
4554 }
4555 
4556 #include <../src/mat/utils/freespace.h>
4557 #include <petscbt.h>
4558 
4559 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4560 {
4561   PetscErrorCode      ierr;
4562   MPI_Comm            comm;
4563   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4564   PetscMPIInt         size,rank,taga,*len_s;
4565   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4566   PetscInt            proc,m;
4567   PetscInt            **buf_ri,**buf_rj;
4568   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4569   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4570   MPI_Request         *s_waits,*r_waits;
4571   MPI_Status          *status;
4572   MatScalar           *aa=a->a;
4573   MatScalar           **abuf_r,*ba_i;
4574   Mat_Merge_SeqsToMPI *merge;
4575   PetscContainer      container;
4576 
4577   PetscFunctionBegin;
4578   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4579   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4580 
4581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4582   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4583 
4584   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4585   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4586 
4587   bi     = merge->bi;
4588   bj     = merge->bj;
4589   buf_ri = merge->buf_ri;
4590   buf_rj = merge->buf_rj;
4591 
4592   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4593   owners = merge->rowmap->range;
4594   len_s  = merge->len_s;
4595 
4596   /* send and recv matrix values */
4597   /*-----------------------------*/
4598   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4599   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4600 
4601   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4602   for (proc=0,k=0; proc<size; proc++) {
4603     if (!len_s[proc]) continue;
4604     i    = owners[proc];
4605     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4606     k++;
4607   }
4608 
4609   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4610   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4611   ierr = PetscFree(status);CHKERRQ(ierr);
4612 
4613   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4614   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4615 
4616   /* insert mat values of mpimat */
4617   /*----------------------------*/
4618   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4619   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4620 
4621   for (k=0; k<merge->nrecv; k++) {
4622     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4623     nrows       = *(buf_ri_k[k]);
4624     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4625     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4626   }
4627 
4628   /* set values of ba */
4629   m = merge->rowmap->n;
4630   for (i=0; i<m; i++) {
4631     arow = owners[rank] + i;
4632     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4633     bnzi = bi[i+1] - bi[i];
4634     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4635 
4636     /* add local non-zero vals of this proc's seqmat into ba */
4637     anzi   = ai[arow+1] - ai[arow];
4638     aj     = a->j + ai[arow];
4639     aa     = a->a + ai[arow];
4640     nextaj = 0;
4641     for (j=0; nextaj<anzi; j++) {
4642       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4643         ba_i[j] += aa[nextaj++];
4644       }
4645     }
4646 
4647     /* add received vals into ba */
4648     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4649       /* i-th row */
4650       if (i == *nextrow[k]) {
4651         anzi   = *(nextai[k]+1) - *nextai[k];
4652         aj     = buf_rj[k] + *(nextai[k]);
4653         aa     = abuf_r[k] + *(nextai[k]);
4654         nextaj = 0;
4655         for (j=0; nextaj<anzi; j++) {
4656           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4657             ba_i[j] += aa[nextaj++];
4658           }
4659         }
4660         nextrow[k]++; nextai[k]++;
4661       }
4662     }
4663     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4664   }
4665   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4666   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4667 
4668   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4669   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4670   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4671   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4672   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4673   PetscFunctionReturn(0);
4674 }
4675 
4676 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4677 {
4678   PetscErrorCode      ierr;
4679   Mat                 B_mpi;
4680   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4681   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4682   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4683   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4684   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4685   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4686   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4687   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4688   MPI_Status          *status;
4689   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4690   PetscBT             lnkbt;
4691   Mat_Merge_SeqsToMPI *merge;
4692   PetscContainer      container;
4693 
4694   PetscFunctionBegin;
4695   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4696 
4697   /* make sure it is a PETSc comm */
4698   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4699   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4700   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4701 
4702   ierr = PetscNew(&merge);CHKERRQ(ierr);
4703   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4704 
4705   /* determine row ownership */
4706   /*---------------------------------------------------------*/
4707   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4708   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4709   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4710   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4711   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4712   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4713   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4714 
4715   m      = merge->rowmap->n;
4716   owners = merge->rowmap->range;
4717 
4718   /* determine the number of messages to send, their lengths */
4719   /*---------------------------------------------------------*/
4720   len_s = merge->len_s;
4721 
4722   len          = 0; /* length of buf_si[] */
4723   merge->nsend = 0;
4724   for (proc=0; proc<size; proc++) {
4725     len_si[proc] = 0;
4726     if (proc == rank) {
4727       len_s[proc] = 0;
4728     } else {
4729       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4730       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4731     }
4732     if (len_s[proc]) {
4733       merge->nsend++;
4734       nrows = 0;
4735       for (i=owners[proc]; i<owners[proc+1]; i++) {
4736         if (ai[i+1] > ai[i]) nrows++;
4737       }
4738       len_si[proc] = 2*(nrows+1);
4739       len         += len_si[proc];
4740     }
4741   }
4742 
4743   /* determine the number and length of messages to receive for ij-structure */
4744   /*-------------------------------------------------------------------------*/
4745   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4746   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4747 
4748   /* post the Irecv of j-structure */
4749   /*-------------------------------*/
4750   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4751   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4752 
4753   /* post the Isend of j-structure */
4754   /*--------------------------------*/
4755   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4756 
4757   for (proc=0, k=0; proc<size; proc++) {
4758     if (!len_s[proc]) continue;
4759     i    = owners[proc];
4760     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4761     k++;
4762   }
4763 
4764   /* receives and sends of j-structure are complete */
4765   /*------------------------------------------------*/
4766   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4767   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4768 
4769   /* send and recv i-structure */
4770   /*---------------------------*/
4771   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4772   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4773 
4774   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4775   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4776   for (proc=0,k=0; proc<size; proc++) {
4777     if (!len_s[proc]) continue;
4778     /* form outgoing message for i-structure:
4779          buf_si[0]:                 nrows to be sent
4780                [1:nrows]:           row index (global)
4781                [nrows+1:2*nrows+1]: i-structure index
4782     */
4783     /*-------------------------------------------*/
4784     nrows       = len_si[proc]/2 - 1;
4785     buf_si_i    = buf_si + nrows+1;
4786     buf_si[0]   = nrows;
4787     buf_si_i[0] = 0;
4788     nrows       = 0;
4789     for (i=owners[proc]; i<owners[proc+1]; i++) {
4790       anzi = ai[i+1] - ai[i];
4791       if (anzi) {
4792         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4793         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4794         nrows++;
4795       }
4796     }
4797     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4798     k++;
4799     buf_si += len_si[proc];
4800   }
4801 
4802   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4803   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4804 
4805   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4806   for (i=0; i<merge->nrecv; i++) {
4807     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4808   }
4809 
4810   ierr = PetscFree(len_si);CHKERRQ(ierr);
4811   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4812   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4813   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4814   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4815   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4816   ierr = PetscFree(status);CHKERRQ(ierr);
4817 
4818   /* compute a local seq matrix in each processor */
4819   /*----------------------------------------------*/
4820   /* allocate bi array and free space for accumulating nonzero column info */
4821   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4822   bi[0] = 0;
4823 
4824   /* create and initialize a linked list */
4825   nlnk = N+1;
4826   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4827 
4828   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4829   len  = ai[owners[rank+1]] - ai[owners[rank]];
4830   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4831 
4832   current_space = free_space;
4833 
4834   /* determine symbolic info for each local row */
4835   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4836 
4837   for (k=0; k<merge->nrecv; k++) {
4838     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4839     nrows       = *buf_ri_k[k];
4840     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4841     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4842   }
4843 
4844   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4845   len  = 0;
4846   for (i=0; i<m; i++) {
4847     bnzi = 0;
4848     /* add local non-zero cols of this proc's seqmat into lnk */
4849     arow  = owners[rank] + i;
4850     anzi  = ai[arow+1] - ai[arow];
4851     aj    = a->j + ai[arow];
4852     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4853     bnzi += nlnk;
4854     /* add received col data into lnk */
4855     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4856       if (i == *nextrow[k]) { /* i-th row */
4857         anzi  = *(nextai[k]+1) - *nextai[k];
4858         aj    = buf_rj[k] + *nextai[k];
4859         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4860         bnzi += nlnk;
4861         nextrow[k]++; nextai[k]++;
4862       }
4863     }
4864     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4865 
4866     /* if free space is not available, make more free space */
4867     if (current_space->local_remaining<bnzi) {
4868       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4869       nspacedouble++;
4870     }
4871     /* copy data into free space, then initialize lnk */
4872     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4873     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4874 
4875     current_space->array           += bnzi;
4876     current_space->local_used      += bnzi;
4877     current_space->local_remaining -= bnzi;
4878 
4879     bi[i+1] = bi[i] + bnzi;
4880   }
4881 
4882   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4883 
4884   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4885   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4886   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4887 
4888   /* create symbolic parallel matrix B_mpi */
4889   /*---------------------------------------*/
4890   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4891   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4892   if (n==PETSC_DECIDE) {
4893     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4894   } else {
4895     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4896   }
4897   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4898   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4899   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4900   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4901   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4902 
4903   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4904   B_mpi->assembled    = PETSC_FALSE;
4905   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4906   merge->bi           = bi;
4907   merge->bj           = bj;
4908   merge->buf_ri       = buf_ri;
4909   merge->buf_rj       = buf_rj;
4910   merge->coi          = NULL;
4911   merge->coj          = NULL;
4912   merge->owners_co    = NULL;
4913 
4914   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4915 
4916   /* attach the supporting struct to B_mpi for reuse */
4917   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4918   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4919   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4920   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4921   *mpimat = B_mpi;
4922 
4923   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 /*@C
4928       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4929                  matrices from each processor
4930 
4931     Collective on MPI_Comm
4932 
4933    Input Parameters:
4934 +    comm - the communicators the parallel matrix will live on
4935 .    seqmat - the input sequential matrices
4936 .    m - number of local rows (or PETSC_DECIDE)
4937 .    n - number of local columns (or PETSC_DECIDE)
4938 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939 
4940    Output Parameter:
4941 .    mpimat - the parallel matrix generated
4942 
4943     Level: advanced
4944 
4945    Notes:
4946      The dimensions of the sequential matrix in each processor MUST be the same.
4947      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4948      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4949 @*/
4950 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4951 {
4952   PetscErrorCode ierr;
4953   PetscMPIInt    size;
4954 
4955   PetscFunctionBegin;
4956   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4957   if (size == 1) {
4958     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4959     if (scall == MAT_INITIAL_MATRIX) {
4960       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4961     } else {
4962       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4963     }
4964     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4965     PetscFunctionReturn(0);
4966   }
4967   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4968   if (scall == MAT_INITIAL_MATRIX) {
4969     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4970   }
4971   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4972   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4973   PetscFunctionReturn(0);
4974 }
4975 
4976 /*@
4977      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4978           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4979           with MatGetSize()
4980 
4981     Not Collective
4982 
4983    Input Parameters:
4984 +    A - the matrix
4985 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4986 
4987    Output Parameter:
4988 .    A_loc - the local sequential matrix generated
4989 
4990     Level: developer
4991 
4992 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4993 
4994 @*/
4995 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4996 {
4997   PetscErrorCode ierr;
4998   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4999   Mat_SeqAIJ     *mat,*a,*b;
5000   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5001   MatScalar      *aa,*ba,*cam;
5002   PetscScalar    *ca;
5003   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5004   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5005   PetscBool      match;
5006   MPI_Comm       comm;
5007   PetscMPIInt    size;
5008 
5009   PetscFunctionBegin;
5010   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5011   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5012   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5013   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5014   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5015 
5016   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5017   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5018   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5019   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5020   aa = a->a; ba = b->a;
5021   if (scall == MAT_INITIAL_MATRIX) {
5022     if (size == 1) {
5023       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5024       PetscFunctionReturn(0);
5025     }
5026 
5027     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5028     ci[0] = 0;
5029     for (i=0; i<am; i++) {
5030       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5031     }
5032     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5033     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5034     k    = 0;
5035     for (i=0; i<am; i++) {
5036       ncols_o = bi[i+1] - bi[i];
5037       ncols_d = ai[i+1] - ai[i];
5038       /* off-diagonal portion of A */
5039       for (jo=0; jo<ncols_o; jo++) {
5040         col = cmap[*bj];
5041         if (col >= cstart) break;
5042         cj[k]   = col; bj++;
5043         ca[k++] = *ba++;
5044       }
5045       /* diagonal portion of A */
5046       for (j=0; j<ncols_d; j++) {
5047         cj[k]   = cstart + *aj++;
5048         ca[k++] = *aa++;
5049       }
5050       /* off-diagonal portion of A */
5051       for (j=jo; j<ncols_o; j++) {
5052         cj[k]   = cmap[*bj++];
5053         ca[k++] = *ba++;
5054       }
5055     }
5056     /* put together the new matrix */
5057     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5058     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5059     /* Since these are PETSc arrays, change flags to free them as necessary. */
5060     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5061     mat->free_a  = PETSC_TRUE;
5062     mat->free_ij = PETSC_TRUE;
5063     mat->nonew   = 0;
5064   } else if (scall == MAT_REUSE_MATRIX) {
5065     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5066     ci = mat->i; cj = mat->j; cam = mat->a;
5067     for (i=0; i<am; i++) {
5068       /* off-diagonal portion of A */
5069       ncols_o = bi[i+1] - bi[i];
5070       for (jo=0; jo<ncols_o; jo++) {
5071         col = cmap[*bj];
5072         if (col >= cstart) break;
5073         *cam++ = *ba++; bj++;
5074       }
5075       /* diagonal portion of A */
5076       ncols_d = ai[i+1] - ai[i];
5077       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5078       /* off-diagonal portion of A */
5079       for (j=jo; j<ncols_o; j++) {
5080         *cam++ = *ba++; bj++;
5081       }
5082     }
5083   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5084   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5085   PetscFunctionReturn(0);
5086 }
5087 
5088 /*@C
5089      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5090 
5091     Not Collective
5092 
5093    Input Parameters:
5094 +    A - the matrix
5095 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5096 -    row, col - index sets of rows and columns to extract (or NULL)
5097 
5098    Output Parameter:
5099 .    A_loc - the local sequential matrix generated
5100 
5101     Level: developer
5102 
5103 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5104 
5105 @*/
5106 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5107 {
5108   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5109   PetscErrorCode ierr;
5110   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5111   IS             isrowa,iscola;
5112   Mat            *aloc;
5113   PetscBool      match;
5114 
5115   PetscFunctionBegin;
5116   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5117   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5118   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5119   if (!row) {
5120     start = A->rmap->rstart; end = A->rmap->rend;
5121     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5122   } else {
5123     isrowa = *row;
5124   }
5125   if (!col) {
5126     start = A->cmap->rstart;
5127     cmap  = a->garray;
5128     nzA   = a->A->cmap->n;
5129     nzB   = a->B->cmap->n;
5130     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5131     ncols = 0;
5132     for (i=0; i<nzB; i++) {
5133       if (cmap[i] < start) idx[ncols++] = cmap[i];
5134       else break;
5135     }
5136     imark = i;
5137     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5138     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5139     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5140   } else {
5141     iscola = *col;
5142   }
5143   if (scall != MAT_INITIAL_MATRIX) {
5144     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5145     aloc[0] = *A_loc;
5146   }
5147   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5148   if (!col) { /* attach global id of condensed columns */
5149     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5150   }
5151   *A_loc = aloc[0];
5152   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5153   if (!row) {
5154     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5155   }
5156   if (!col) {
5157     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5158   }
5159   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5160   PetscFunctionReturn(0);
5161 }
5162 
5163 /*@C
5164     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5165 
5166     Collective on Mat
5167 
5168    Input Parameters:
5169 +    A,B - the matrices in mpiaij format
5170 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5171 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5172 
5173    Output Parameter:
5174 +    rowb, colb - index sets of rows and columns of B to extract
5175 -    B_seq - the sequential matrix generated
5176 
5177     Level: developer
5178 
5179 @*/
5180 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5181 {
5182   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5183   PetscErrorCode ierr;
5184   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5185   IS             isrowb,iscolb;
5186   Mat            *bseq=NULL;
5187 
5188   PetscFunctionBegin;
5189   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5190     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5191   }
5192   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5193 
5194   if (scall == MAT_INITIAL_MATRIX) {
5195     start = A->cmap->rstart;
5196     cmap  = a->garray;
5197     nzA   = a->A->cmap->n;
5198     nzB   = a->B->cmap->n;
5199     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5200     ncols = 0;
5201     for (i=0; i<nzB; i++) {  /* row < local row index */
5202       if (cmap[i] < start) idx[ncols++] = cmap[i];
5203       else break;
5204     }
5205     imark = i;
5206     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5207     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5208     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5209     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5210   } else {
5211     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5212     isrowb  = *rowb; iscolb = *colb;
5213     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5214     bseq[0] = *B_seq;
5215   }
5216   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5217   *B_seq = bseq[0];
5218   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5219   if (!rowb) {
5220     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5221   } else {
5222     *rowb = isrowb;
5223   }
5224   if (!colb) {
5225     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5226   } else {
5227     *colb = iscolb;
5228   }
5229   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5230   PetscFunctionReturn(0);
5231 }
5232 
5233 /*
5234     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5235     of the OFF-DIAGONAL portion of local A
5236 
5237     Collective on Mat
5238 
5239    Input Parameters:
5240 +    A,B - the matrices in mpiaij format
5241 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5242 
5243    Output Parameter:
5244 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5245 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5246 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5247 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5248 
5249     Level: developer
5250 
5251 */
5252 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5253 {
5254   VecScatter_MPI_General *gen_to,*gen_from;
5255   PetscErrorCode         ierr;
5256   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5257   Mat_SeqAIJ             *b_oth;
5258   VecScatter             ctx;
5259   MPI_Comm               comm;
5260   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5261   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5262   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5263   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5264   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5265   MPI_Request            *rwaits = NULL,*swaits = NULL;
5266   MPI_Status             *sstatus,rstatus;
5267   PetscMPIInt            jj,size;
5268   VecScatterType         type;
5269   PetscBool              mpi1;
5270 
5271   PetscFunctionBegin;
5272   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5273   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5274 
5275   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5276     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5277   }
5278   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5279   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5280 
5281   if (size == 1) {
5282     startsj_s = NULL;
5283     bufa_ptr  = NULL;
5284     *B_oth    = NULL;
5285     PetscFunctionReturn(0);
5286   }
5287 
5288   ctx = a->Mvctx;
5289   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5290   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5291   if (!mpi1) {
5292     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5293      thus create a->Mvctx_mpi1 */
5294     if (!a->Mvctx_mpi1) {
5295       a->Mvctx_mpi1_flg = PETSC_TRUE;
5296       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5297     }
5298     ctx = a->Mvctx_mpi1;
5299   }
5300   tag = ((PetscObject)ctx)->tag;
5301 
5302   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5303   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5304   nrecvs   = gen_from->n;
5305   nsends   = gen_to->n;
5306 
5307   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5308   srow    = gen_to->indices;    /* local row index to be sent */
5309   sstarts = gen_to->starts;
5310   sprocs  = gen_to->procs;
5311   sstatus = gen_to->sstatus;
5312   sbs     = gen_to->bs;
5313   rstarts = gen_from->starts;
5314   rprocs  = gen_from->procs;
5315   rbs     = gen_from->bs;
5316 
5317   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5318   if (scall == MAT_INITIAL_MATRIX) {
5319     /* i-array */
5320     /*---------*/
5321     /*  post receives */
5322     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5323     for (i=0; i<nrecvs; i++) {
5324       rowlen = rvalues + rstarts[i]*rbs;
5325       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5326       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5327     }
5328 
5329     /* pack the outgoing message */
5330     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5331 
5332     sstartsj[0] = 0;
5333     rstartsj[0] = 0;
5334     len         = 0; /* total length of j or a array to be sent */
5335     k           = 0;
5336     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5337     for (i=0; i<nsends; i++) {
5338       rowlen = svalues + sstarts[i]*sbs;
5339       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5340       for (j=0; j<nrows; j++) {
5341         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5342         for (l=0; l<sbs; l++) {
5343           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5344 
5345           rowlen[j*sbs+l] = ncols;
5346 
5347           len += ncols;
5348           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5349         }
5350         k++;
5351       }
5352       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5353 
5354       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5355     }
5356     /* recvs and sends of i-array are completed */
5357     i = nrecvs;
5358     while (i--) {
5359       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5360     }
5361     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5362     ierr = PetscFree(svalues);CHKERRQ(ierr);
5363 
5364     /* allocate buffers for sending j and a arrays */
5365     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5366     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5367 
5368     /* create i-array of B_oth */
5369     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5370 
5371     b_othi[0] = 0;
5372     len       = 0; /* total length of j or a array to be received */
5373     k         = 0;
5374     for (i=0; i<nrecvs; i++) {
5375       rowlen = rvalues + rstarts[i]*rbs;
5376       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5377       for (j=0; j<nrows; j++) {
5378         b_othi[k+1] = b_othi[k] + rowlen[j];
5379         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5380         k++;
5381       }
5382       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5383     }
5384     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5385 
5386     /* allocate space for j and a arrrays of B_oth */
5387     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5388     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5389 
5390     /* j-array */
5391     /*---------*/
5392     /*  post receives of j-array */
5393     for (i=0; i<nrecvs; i++) {
5394       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5395       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5396     }
5397 
5398     /* pack the outgoing message j-array */
5399     k = 0;
5400     for (i=0; i<nsends; i++) {
5401       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5402       bufJ  = bufj+sstartsj[i];
5403       for (j=0; j<nrows; j++) {
5404         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5405         for (ll=0; ll<sbs; ll++) {
5406           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5407           for (l=0; l<ncols; l++) {
5408             *bufJ++ = cols[l];
5409           }
5410           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5411         }
5412       }
5413       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5414     }
5415 
5416     /* recvs and sends of j-array are completed */
5417     i = nrecvs;
5418     while (i--) {
5419       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5420     }
5421     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5422   } else if (scall == MAT_REUSE_MATRIX) {
5423     sstartsj = *startsj_s;
5424     rstartsj = *startsj_r;
5425     bufa     = *bufa_ptr;
5426     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5427     b_otha   = b_oth->a;
5428   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5429 
5430   /* a-array */
5431   /*---------*/
5432   /*  post receives of a-array */
5433   for (i=0; i<nrecvs; i++) {
5434     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5435     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5436   }
5437 
5438   /* pack the outgoing message a-array */
5439   k = 0;
5440   for (i=0; i<nsends; i++) {
5441     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5442     bufA  = bufa+sstartsj[i];
5443     for (j=0; j<nrows; j++) {
5444       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5445       for (ll=0; ll<sbs; ll++) {
5446         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5447         for (l=0; l<ncols; l++) {
5448           *bufA++ = vals[l];
5449         }
5450         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5451       }
5452     }
5453     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5454   }
5455   /* recvs and sends of a-array are completed */
5456   i = nrecvs;
5457   while (i--) {
5458     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5459   }
5460   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5461   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5462 
5463   if (scall == MAT_INITIAL_MATRIX) {
5464     /* put together the new matrix */
5465     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5466 
5467     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5468     /* Since these are PETSc arrays, change flags to free them as necessary. */
5469     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5470     b_oth->free_a  = PETSC_TRUE;
5471     b_oth->free_ij = PETSC_TRUE;
5472     b_oth->nonew   = 0;
5473 
5474     ierr = PetscFree(bufj);CHKERRQ(ierr);
5475     if (!startsj_s || !bufa_ptr) {
5476       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5477       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5478     } else {
5479       *startsj_s = sstartsj;
5480       *startsj_r = rstartsj;
5481       *bufa_ptr  = bufa;
5482     }
5483   }
5484   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5485   PetscFunctionReturn(0);
5486 }
5487 
5488 /*@C
5489   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5490 
5491   Not Collective
5492 
5493   Input Parameters:
5494 . A - The matrix in mpiaij format
5495 
5496   Output Parameter:
5497 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5498 . colmap - A map from global column index to local index into lvec
5499 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5500 
5501   Level: developer
5502 
5503 @*/
5504 #if defined(PETSC_USE_CTABLE)
5505 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5506 #else
5507 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5508 #endif
5509 {
5510   Mat_MPIAIJ *a;
5511 
5512   PetscFunctionBegin;
5513   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5514   PetscValidPointer(lvec, 2);
5515   PetscValidPointer(colmap, 3);
5516   PetscValidPointer(multScatter, 4);
5517   a = (Mat_MPIAIJ*) A->data;
5518   if (lvec) *lvec = a->lvec;
5519   if (colmap) *colmap = a->colmap;
5520   if (multScatter) *multScatter = a->Mvctx;
5521   PetscFunctionReturn(0);
5522 }
5523 
5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5526 #if defined(PETSC_HAVE_MKL_SPARSE)
5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5528 #endif
5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5530 #if defined(PETSC_HAVE_ELEMENTAL)
5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5532 #endif
5533 #if defined(PETSC_HAVE_HYPRE)
5534 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5535 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5536 #endif
5537 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5539 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5540 
5541 /*
5542     Computes (B'*A')' since computing B*A directly is untenable
5543 
5544                n                       p                          p
5545         (              )       (              )         (                  )
5546       m (      A       )  *  n (       B      )   =   m (         C        )
5547         (              )       (              )         (                  )
5548 
5549 */
5550 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5551 {
5552   PetscErrorCode ierr;
5553   Mat            At,Bt,Ct;
5554 
5555   PetscFunctionBegin;
5556   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5557   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5558   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5559   ierr = MatDestroy(&At);CHKERRQ(ierr);
5560   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5561   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5562   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5563   PetscFunctionReturn(0);
5564 }
5565 
5566 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5567 {
5568   PetscErrorCode ierr;
5569   PetscInt       m=A->rmap->n,n=B->cmap->n;
5570   Mat            Cmat;
5571 
5572   PetscFunctionBegin;
5573   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5574   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5575   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5576   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5577   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5578   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5579   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5580   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5581 
5582   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5583 
5584   *C = Cmat;
5585   PetscFunctionReturn(0);
5586 }
5587 
5588 /* ----------------------------------------------------------------*/
5589 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5590 {
5591   PetscErrorCode ierr;
5592 
5593   PetscFunctionBegin;
5594   if (scall == MAT_INITIAL_MATRIX) {
5595     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5596     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5597     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5598   }
5599   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5600   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5601   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5602   PetscFunctionReturn(0);
5603 }
5604 
5605 /*MC
5606    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5607 
5608    Options Database Keys:
5609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5610 
5611   Level: beginner
5612 
5613 .seealso: MatCreateAIJ()
5614 M*/
5615 
5616 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5617 {
5618   Mat_MPIAIJ     *b;
5619   PetscErrorCode ierr;
5620   PetscMPIInt    size;
5621 
5622   PetscFunctionBegin;
5623   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5624 
5625   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5626   B->data       = (void*)b;
5627   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5628   B->assembled  = PETSC_FALSE;
5629   B->insertmode = NOT_SET_VALUES;
5630   b->size       = size;
5631 
5632   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5633 
5634   /* build cache for off array entries formed */
5635   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5636 
5637   b->donotstash  = PETSC_FALSE;
5638   b->colmap      = 0;
5639   b->garray      = 0;
5640   b->roworiented = PETSC_TRUE;
5641 
5642   /* stuff used for matrix vector multiply */
5643   b->lvec  = NULL;
5644   b->Mvctx = NULL;
5645 
5646   /* stuff for MatGetRow() */
5647   b->rowindices   = 0;
5648   b->rowvalues    = 0;
5649   b->getrowactive = PETSC_FALSE;
5650 
5651   /* flexible pointer used in CUSP/CUSPARSE classes */
5652   b->spptr = NULL;
5653 
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5663 #if defined(PETSC_HAVE_MKL_SPARSE)
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5665 #endif
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5668 #if defined(PETSC_HAVE_ELEMENTAL)
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5670 #endif
5671 #if defined(PETSC_HAVE_HYPRE)
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5673 #endif
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5679 #if defined(PETSC_HAVE_HYPRE)
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5681 #endif
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5684   PetscFunctionReturn(0);
5685 }
5686 
5687 /*@C
5688      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5689          and "off-diagonal" part of the matrix in CSR format.
5690 
5691    Collective on MPI_Comm
5692 
5693    Input Parameters:
5694 +  comm - MPI communicator
5695 .  m - number of local rows (Cannot be PETSC_DECIDE)
5696 .  n - This value should be the same as the local size used in creating the
5697        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5698        calculated if N is given) For square matrices n is almost always m.
5699 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5700 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5701 .   i - row indices for "diagonal" portion of matrix
5702 .   j - column indices
5703 .   a - matrix values
5704 .   oi - row indices for "off-diagonal" portion of matrix
5705 .   oj - column indices
5706 -   oa - matrix values
5707 
5708    Output Parameter:
5709 .   mat - the matrix
5710 
5711    Level: advanced
5712 
5713    Notes:
5714        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5715        must free the arrays once the matrix has been destroyed and not before.
5716 
5717        The i and j indices are 0 based
5718 
5719        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5720 
5721        This sets local rows and cannot be used to set off-processor values.
5722 
5723        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5724        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5725        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5726        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5727        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5728        communication if it is known that only local entries will be set.
5729 
5730 .keywords: matrix, aij, compressed row, sparse, parallel
5731 
5732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5733           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5734 @*/
5735 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5736 {
5737   PetscErrorCode ierr;
5738   Mat_MPIAIJ     *maij;
5739 
5740   PetscFunctionBegin;
5741   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5742   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5743   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5744   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5745   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5746   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5747   maij = (Mat_MPIAIJ*) (*mat)->data;
5748 
5749   (*mat)->preallocated = PETSC_TRUE;
5750 
5751   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5752   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5753 
5754   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5755   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5756 
5757   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5758   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5759   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5760   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5761 
5762   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5763   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5764   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5765   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5766   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5767   PetscFunctionReturn(0);
5768 }
5769 
5770 /*
5771     Special version for direct calls from Fortran
5772 */
5773 #include <petsc/private/fortranimpl.h>
5774 
5775 /* Change these macros so can be used in void function */
5776 #undef CHKERRQ
5777 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5778 #undef SETERRQ2
5779 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5780 #undef SETERRQ3
5781 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5782 #undef SETERRQ
5783 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5784 
5785 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5786 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5787 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5788 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5789 #else
5790 #endif
5791 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5792 {
5793   Mat            mat  = *mmat;
5794   PetscInt       m    = *mm, n = *mn;
5795   InsertMode     addv = *maddv;
5796   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5797   PetscScalar    value;
5798   PetscErrorCode ierr;
5799 
5800   MatCheckPreallocated(mat,1);
5801   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5802 
5803 #if defined(PETSC_USE_DEBUG)
5804   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5805 #endif
5806   {
5807     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5808     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5809     PetscBool roworiented = aij->roworiented;
5810 
5811     /* Some Variables required in the macro */
5812     Mat        A                 = aij->A;
5813     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5814     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5815     MatScalar  *aa               = a->a;
5816     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5817     Mat        B                 = aij->B;
5818     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5819     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5820     MatScalar  *ba               = b->a;
5821 
5822     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5823     PetscInt  nonew = a->nonew;
5824     MatScalar *ap1,*ap2;
5825 
5826     PetscFunctionBegin;
5827     for (i=0; i<m; i++) {
5828       if (im[i] < 0) continue;
5829 #if defined(PETSC_USE_DEBUG)
5830       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5831 #endif
5832       if (im[i] >= rstart && im[i] < rend) {
5833         row      = im[i] - rstart;
5834         lastcol1 = -1;
5835         rp1      = aj + ai[row];
5836         ap1      = aa + ai[row];
5837         rmax1    = aimax[row];
5838         nrow1    = ailen[row];
5839         low1     = 0;
5840         high1    = nrow1;
5841         lastcol2 = -1;
5842         rp2      = bj + bi[row];
5843         ap2      = ba + bi[row];
5844         rmax2    = bimax[row];
5845         nrow2    = bilen[row];
5846         low2     = 0;
5847         high2    = nrow2;
5848 
5849         for (j=0; j<n; j++) {
5850           if (roworiented) value = v[i*n+j];
5851           else value = v[i+j*m];
5852           if (in[j] >= cstart && in[j] < cend) {
5853             col = in[j] - cstart;
5854             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5855             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5856           } else if (in[j] < 0) continue;
5857 #if defined(PETSC_USE_DEBUG)
5858           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5859           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5860 #endif
5861           else {
5862             if (mat->was_assembled) {
5863               if (!aij->colmap) {
5864                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5865               }
5866 #if defined(PETSC_USE_CTABLE)
5867               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5868               col--;
5869 #else
5870               col = aij->colmap[in[j]] - 1;
5871 #endif
5872               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5873               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5874                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5875                 col  =  in[j];
5876                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5877                 B     = aij->B;
5878                 b     = (Mat_SeqAIJ*)B->data;
5879                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5880                 rp2   = bj + bi[row];
5881                 ap2   = ba + bi[row];
5882                 rmax2 = bimax[row];
5883                 nrow2 = bilen[row];
5884                 low2  = 0;
5885                 high2 = nrow2;
5886                 bm    = aij->B->rmap->n;
5887                 ba    = b->a;
5888               }
5889             } else col = in[j];
5890             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5891           }
5892         }
5893       } else if (!aij->donotstash) {
5894         if (roworiented) {
5895           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5896         } else {
5897           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5898         }
5899       }
5900     }
5901   }
5902   PetscFunctionReturnVoid();
5903 }
5904