xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7b659617f2990ff61c1685b7fa54504ceab83bac)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscBool      lf;
1105   PetscMPIInt    size;
1106 
1107   PetscFunctionBegin;
1108   /* Easy test: symmetric diagonal block */
1109   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1110   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1111   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1112   if (!*f) PetscFunctionReturn(0);
1113   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1115   if (size == 1) PetscFunctionReturn(0);
1116 
1117   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1118   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1119   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1120   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1121   for (i=0; i<first; i++) notme[i] = i;
1122   for (i=last; i<M; i++) notme[i-last+first] = i;
1123   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1124   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1125   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1126   Aoff = Aoffs[0];
1127   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1128   Boff = Boffs[0];
1129   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1130   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1131   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1132   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1133   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1134   ierr = PetscFree(notme);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* send it on its way */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   /* do local part */
1158   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   /* receive remote parts */
1160   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165   This only works correctly for square matrices where the subblock A->A is the
1166    diagonal block
1167 */
1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1169 {
1170   PetscErrorCode ierr;
1171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1172 
1173   PetscFunctionBegin;
1174   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1175   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1176   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1181 {
1182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1187   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1192 {
1193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197 #if defined(PETSC_USE_LOG)
1198   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1199 #endif
1200   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1201   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1202   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1203   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1204 #if defined(PETSC_USE_CTABLE)
1205   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1206 #else
1207   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1208 #endif
1209   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1210   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1212   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1213   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1214   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1215   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_ELEMENTAL)
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1228 #endif
1229 #if defined(PETSC_HAVE_HYPRE)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1239 {
1240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1241   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1242   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1243   PetscErrorCode ierr;
1244   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1245   int            fd;
1246   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1247   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1248   PetscScalar    *column_values;
1249   PetscInt       message_count,flowcontrolcount;
1250   FILE           *file;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1255   nz   = A->nz + B->nz;
1256   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257   if (!rank) {
1258     header[0] = MAT_FILE_CLASSID;
1259     header[1] = mat->rmap->N;
1260     header[2] = mat->cmap->N;
1261 
1262     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     /* get largest number of rows any processor has */
1265     rlen  = mat->rmap->n;
1266     range = mat->rmap->range;
1267     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1268   } else {
1269     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     rlen = mat->rmap->n;
1271   }
1272 
1273   /* load up the local row counts */
1274   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1275   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1276 
1277   /* store the row lengths to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1283       rlen = range[i+1] - range[i];
1284       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1290     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1291     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1292   }
1293   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1294 
1295   /* load up the local column indices */
1296   nzmax = nz; /* th processor needs space a largest processor needs */
1297   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1299   cnt   = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ((col = garray[B->j[j]]) > cstart) break;
1303       column_indices[cnt++] = col;
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1306     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1307   }
1308   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1309 
1310   /* store the column indices to the file */
1311   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1312   if (!rank) {
1313     MPI_Status status;
1314     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     for (i=1; i<size; i++) {
1316       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1317       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1318       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1319       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1321     }
1322     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1323   } else {
1324     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1325     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1326     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1328   }
1329   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1330 
1331   /* load up the local column values */
1332   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1333   cnt  = 0;
1334   for (i=0; i<mat->rmap->n; i++) {
1335     for (j=B->i[i]; j<B->i[i+1]; j++) {
1336       if (garray[B->j[j]] > cstart) break;
1337       column_values[cnt++] = B->a[j];
1338     }
1339     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1340     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1341   }
1342   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1343 
1344   /* store the column values to the file */
1345   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1346   if (!rank) {
1347     MPI_Status status;
1348     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1349     for (i=1; i<size; i++) {
1350       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1351       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1352       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1353       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1360     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(column_values);CHKERRQ(ierr);
1364 
1365   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1366   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #include <petscdraw.h>
1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1372 {
1373   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1374   PetscErrorCode    ierr;
1375   PetscMPIInt       rank = aij->rank,size = aij->size;
1376   PetscBool         isdraw,iascii,isbinary;
1377   PetscViewer       sviewer;
1378   PetscViewerFormat format;
1379 
1380   PetscFunctionBegin;
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1382   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1384   if (iascii) {
1385     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1386     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1387       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1388       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1389       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       for (i=0; i<(PetscInt)size; i++) {
1391         nmax = PetscMax(nmax,nz[i]);
1392         nmin = PetscMin(nmin,nz[i]);
1393         navg += nz[i];
1394       }
1395       ierr = PetscFree(nz);CHKERRQ(ierr);
1396       navg = navg/size;
1397       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1398       PetscFunctionReturn(0);
1399     }
1400     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1401     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1402       MatInfo   info;
1403       PetscBool inodes;
1404 
1405       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1406       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1407       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1409       if (!inodes) {
1410         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1411                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1412       } else {
1413         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1414                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1415       }
1416       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1420       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1421       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1422       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1423       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1424       PetscFunctionReturn(0);
1425     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1426       PetscInt inodecount,inodelimit,*inodes;
1427       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1428       if (inodes) {
1429         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1430       } else {
1431         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1432       }
1433       PetscFunctionReturn(0);
1434     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1435       PetscFunctionReturn(0);
1436     }
1437   } else if (isbinary) {
1438     if (size == 1) {
1439       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1440       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1441     } else {
1442       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1443     }
1444     PetscFunctionReturn(0);
1445   } else if (isdraw) {
1446     PetscDraw draw;
1447     PetscBool isnull;
1448     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1449     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1450     if (isnull) PetscFunctionReturn(0);
1451   }
1452 
1453   {
1454     /* assemble the entire matrix onto first processor. */
1455     Mat        A;
1456     Mat_SeqAIJ *Aloc;
1457     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1458     MatScalar  *a;
1459 
1460     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1461     if (!rank) {
1462       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1463     } else {
1464       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1465     }
1466     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1467     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1468     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1469     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1470     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1471 
1472     /* copy over the A part */
1473     Aloc = (Mat_SeqAIJ*)aij->A->data;
1474     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475     row  = mat->rmap->rstart;
1476     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1477     for (i=0; i<m; i++) {
1478       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1479       row++;
1480       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1481     }
1482     aj = Aloc->j;
1483     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1484 
1485     /* copy over the B part */
1486     Aloc = (Mat_SeqAIJ*)aij->B->data;
1487     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1488     row  = mat->rmap->rstart;
1489     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1490     ct   = cols;
1491     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1492     for (i=0; i<m; i++) {
1493       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1494       row++;
1495       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1496     }
1497     ierr = PetscFree(ct);CHKERRQ(ierr);
1498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1500     /*
1501        Everyone has to call to draw the matrix since the graphics waits are
1502        synchronized across all processors that share the PetscDraw object
1503     */
1504     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     if (!rank) {
1506       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1508     }
1509     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1510     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1517 {
1518   PetscErrorCode ierr;
1519   PetscBool      iascii,isdraw,issocket,isbinary;
1520 
1521   PetscFunctionBegin;
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1526   if (iascii || isdraw || isbinary || issocket) {
1527     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1533 {
1534   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1535   PetscErrorCode ierr;
1536   Vec            bb1 = 0;
1537   PetscBool      hasop;
1538 
1539   PetscFunctionBegin;
1540   if (flag == SOR_APPLY_UPPER) {
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542     PetscFunctionReturn(0);
1543   }
1544 
1545   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1547   }
1548 
1549   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554 
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587     while (its--) {
1588       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590 
1591       /* update rhs: bb1 = bb - B*x */
1592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1594 
1595       /* local sweep */
1596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1597     }
1598   } else if (flag & SOR_EISENSTAT) {
1599     Vec xx1;
1600 
1601     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1603 
1604     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     if (!mat->diag) {
1607       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1608       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1609     }
1610     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1611     if (hasop) {
1612       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1613     } else {
1614       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1615     }
1616     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1617 
1618     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1619 
1620     /* local sweep */
1621     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1622     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1623     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1624   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1625 
1626   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1627 
1628   matin->factorerrortype = mat->A->factorerrortype;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1633 {
1634   Mat            aA,aB,Aperm;
1635   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1636   PetscScalar    *aa,*ba;
1637   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1638   PetscSF        rowsf,sf;
1639   IS             parcolp = NULL;
1640   PetscBool      done;
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1645   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1648 
1649   /* Invert row permutation to find out where my rows should go */
1650   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1651   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1652   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1654   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656 
1657   /* Invert column permutation to find out where my columns should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1661   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1662   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1665 
1666   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1667   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1668   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1669 
1670   /* Find out where my gcols should go */
1671   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1672   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1674   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1675   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1676   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1679 
1680   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1681   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     PetscInt row = rdest[i],rowner;
1685     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1686     for (j=ai[i]; j<ai[i+1]; j++) {
1687       PetscInt cowner,col = cdest[aj[j]];
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692     for (j=bi[i]; j<bi[i+1]; j++) {
1693       PetscInt cowner,col = gcdest[bj[j]];
1694       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1695       if (rowner == cowner) dnnz[i]++;
1696       else onnz[i]++;
1697     }
1698   }
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1704 
1705   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1706   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1708   for (i=0; i<m; i++) {
1709     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1710     PetscInt j0,rowlen;
1711     rowlen = ai[i+1] - ai[i];
1712     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716     rowlen = bi[i+1] - bi[i];
1717     for (j0=j=0; j<rowlen; j0=j) {
1718       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1719       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1720     }
1721   }
1722   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1726   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1728   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1729   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1730   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1731   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1732   *B = Aperm;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1737 {
1738   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1743   if (ghosts) *ghosts = aij->garray;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A    = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size = 1.0;
1756   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1759   isend[3] = info->memory;  isend[4] = info->mallocs;
1760 
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762 
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1773 
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   } else if (flag == MAT_GLOBAL_SUM) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   }
1788   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1789   info->fill_ratio_needed = 0;
1790   info->factor_mallocs    = 0;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     MatCheckPreallocated(A,1);
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     MatCheckPreallocated(A,1);
1814     a->roworiented = flg;
1815 
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_NEW_DIAGONALS:
1820     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1821     break;
1822   case MAT_IGNORE_OFF_PROC_ENTRIES:
1823     a->donotstash = flg;
1824     break;
1825   case MAT_SPD:
1826     A->spd_set = PETSC_TRUE;
1827     A->spd     = flg;
1828     if (flg) {
1829       A->symmetric                  = PETSC_TRUE;
1830       A->structurally_symmetric     = PETSC_TRUE;
1831       A->symmetric_set              = PETSC_TRUE;
1832       A->structurally_symmetric_set = PETSC_TRUE;
1833     }
1834     break;
1835   case MAT_SYMMETRIC:
1836     MatCheckPreallocated(A,1);
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_STRUCTURALLY_SYMMETRIC:
1840     MatCheckPreallocated(A,1);
1841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1842     break;
1843   case MAT_HERMITIAN:
1844     MatCheckPreallocated(A,1);
1845     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1846     break;
1847   case MAT_SYMMETRY_ETERNAL:
1848     MatCheckPreallocated(A,1);
1849     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1850     break;
1851   case MAT_SUBMAT_SINGLEIS:
1852     A->submat_singleis = flg;
1853     break;
1854   case MAT_STRUCTURE_ONLY:
1855     /* The option is handled directly by MatSetOption() */
1856     break;
1857   default:
1858     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1859   }
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1864 {
1865   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1866   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1867   PetscErrorCode ierr;
1868   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1869   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1870   PetscInt       *cmap,*idx_p;
1871 
1872   PetscFunctionBegin;
1873   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1874   mat->getrowactive = PETSC_TRUE;
1875 
1876   if (!mat->rowvalues && (idx || v)) {
1877     /*
1878         allocate enough space to hold information from the longest row.
1879     */
1880     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1881     PetscInt   max = 1,tmp;
1882     for (i=0; i<matin->rmap->n; i++) {
1883       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1884       if (max < tmp) max = tmp;
1885     }
1886     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1887   }
1888 
1889   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1890   lrow = row - rstart;
1891 
1892   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1893   if (!v)   {pvA = 0; pvB = 0;}
1894   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1895   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1896   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1897   nztot = nzA + nzB;
1898 
1899   cmap = mat->garray;
1900   if (v  || idx) {
1901     if (nztot) {
1902       /* Sort by increasing column numbers, assuming A and B already sorted */
1903       PetscInt imark = -1;
1904       if (v) {
1905         *v = v_p = mat->rowvalues;
1906         for (i=0; i<nzB; i++) {
1907           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1908           else break;
1909         }
1910         imark = i;
1911         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1912         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1913       }
1914       if (idx) {
1915         *idx = idx_p = mat->rowindices;
1916         if (imark > -1) {
1917           for (i=0; i<imark; i++) {
1918             idx_p[i] = cmap[cworkB[i]];
1919           }
1920         } else {
1921           for (i=0; i<nzB; i++) {
1922             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1923             else break;
1924           }
1925           imark = i;
1926         }
1927         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1928         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1929       }
1930     } else {
1931       if (idx) *idx = 0;
1932       if (v)   *v   = 0;
1933     }
1934   }
1935   *nz  = nztot;
1936   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1937   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1942 {
1943   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1944 
1945   PetscFunctionBegin;
1946   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1947   aij->getrowactive = PETSC_FALSE;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1952 {
1953   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1954   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1955   PetscErrorCode ierr;
1956   PetscInt       i,j,cstart = mat->cmap->rstart;
1957   PetscReal      sum = 0.0;
1958   MatScalar      *v;
1959 
1960   PetscFunctionBegin;
1961   if (aij->size == 1) {
1962     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1963   } else {
1964     if (type == NORM_FROBENIUS) {
1965       v = amat->a;
1966       for (i=0; i<amat->nz; i++) {
1967         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1968       }
1969       v = bmat->a;
1970       for (i=0; i<bmat->nz; i++) {
1971         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1972       }
1973       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1974       *norm = PetscSqrtReal(*norm);
1975       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1976     } else if (type == NORM_1) { /* max column norm */
1977       PetscReal *tmp,*tmp2;
1978       PetscInt  *jj,*garray = aij->garray;
1979       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1980       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1981       *norm = 0.0;
1982       v     = amat->a; jj = amat->j;
1983       for (j=0; j<amat->nz; j++) {
1984         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1985       }
1986       v = bmat->a; jj = bmat->j;
1987       for (j=0; j<bmat->nz; j++) {
1988         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1989       }
1990       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1991       for (j=0; j<mat->cmap->N; j++) {
1992         if (tmp2[j] > *norm) *norm = tmp2[j];
1993       }
1994       ierr = PetscFree(tmp);CHKERRQ(ierr);
1995       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1996       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1997     } else if (type == NORM_INFINITY) { /* max row norm */
1998       PetscReal ntemp = 0.0;
1999       for (j=0; j<aij->A->rmap->n; j++) {
2000         v   = amat->a + amat->i[j];
2001         sum = 0.0;
2002         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2003           sum += PetscAbsScalar(*v); v++;
2004         }
2005         v = bmat->a + bmat->i[j];
2006         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2007           sum += PetscAbsScalar(*v); v++;
2008         }
2009         if (sum > ntemp) ntemp = sum;
2010       }
2011       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2012       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2013     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2014   }
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2019 {
2020   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2021   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2022   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2023   PetscErrorCode ierr;
2024   Mat            B,A_diag,*B_diag;
2025   MatScalar      *array;
2026 
2027   PetscFunctionBegin;
2028   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2029   ai = Aloc->i; aj = Aloc->j;
2030   bi = Bloc->i; bj = Bloc->j;
2031   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2032     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2033     PetscSFNode          *oloc;
2034     PETSC_UNUSED PetscSF sf;
2035 
2036     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2037     /* compute d_nnz for preallocation */
2038     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2039     for (i=0; i<ai[ma]; i++) {
2040       d_nnz[aj[i]]++;
2041     }
2042     /* compute local off-diagonal contributions */
2043     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2044     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2045     /* map those to global */
2046     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2047     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2048     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2049     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2050     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2051     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2052     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2053 
2054     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2055     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2056     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2057     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2058     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2059     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2060   } else {
2061     B    = *matout;
2062     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2063   }
2064 
2065   b           = (Mat_MPIAIJ*)B->data;
2066   A_diag      = a->A;
2067   B_diag      = &b->A;
2068   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2069   A_diag_ncol = A_diag->cmap->N;
2070   B_diag_ilen = sub_B_diag->ilen;
2071   B_diag_i    = sub_B_diag->i;
2072 
2073   /* Set ilen for diagonal of B */
2074   for (i=0; i<A_diag_ncol; i++) {
2075     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2076   }
2077 
2078   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2079   very quickly (=without using MatSetValues), because all writes are local. */
2080   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2081 
2082   /* copy over the B part */
2083   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2084   array = Bloc->a;
2085   row   = A->rmap->rstart;
2086   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2087   cols_tmp = cols;
2088   for (i=0; i<mb; i++) {
2089     ncol = bi[i+1]-bi[i];
2090     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2091     row++;
2092     array += ncol; cols_tmp += ncol;
2093   }
2094   ierr = PetscFree(cols);CHKERRQ(ierr);
2095 
2096   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2098   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2099     *matout = B;
2100   } else {
2101     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2102   }
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2107 {
2108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2109   Mat            a    = aij->A,b = aij->B;
2110   PetscErrorCode ierr;
2111   PetscInt       s1,s2,s3;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2115   if (rr) {
2116     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2117     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2118     /* Overlap communication with computation. */
2119     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2120   }
2121   if (ll) {
2122     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2123     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2124     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2125   }
2126   /* scale  the diagonal block */
2127   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2128 
2129   if (rr) {
2130     /* Do a scatter end and then right scale the off-diagonal block */
2131     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2132     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2148 {
2149   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2150   Mat            a,b,c,d;
2151   PetscBool      flg;
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   a = matA->A; b = matA->B;
2156   c = matB->A; d = matB->B;
2157 
2158   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2159   if (flg) {
2160     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2161   }
2162   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2167 {
2168   PetscErrorCode ierr;
2169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2170   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2171 
2172   PetscFunctionBegin;
2173   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2174   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2175     /* because of the column compression in the off-processor part of the matrix a->B,
2176        the number of columns in a->B and b->B may be different, hence we cannot call
2177        the MatCopy() directly on the two parts. If need be, we can provide a more
2178        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2179        then copying the submatrices */
2180     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2181   } else {
2182     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2183     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2184   }
2185   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2190 {
2191   PetscErrorCode ierr;
2192 
2193   PetscFunctionBegin;
2194   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 /*
2199    Computes the number of nonzeros per row needed for preallocation when X and Y
2200    have different nonzero structure.
2201 */
2202 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2203 {
2204   PetscInt       i,j,k,nzx,nzy;
2205 
2206   PetscFunctionBegin;
2207   /* Set the number of nonzeros in the new matrix */
2208   for (i=0; i<m; i++) {
2209     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2210     nzx = xi[i+1] - xi[i];
2211     nzy = yi[i+1] - yi[i];
2212     nnz[i] = 0;
2213     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2214       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2215       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2216       nnz[i]++;
2217     }
2218     for (; k<nzy; k++) nnz[i]++;
2219   }
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2224 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2225 {
2226   PetscErrorCode ierr;
2227   PetscInt       m = Y->rmap->N;
2228   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2229   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2230 
2231   PetscFunctionBegin;
2232   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2237 {
2238   PetscErrorCode ierr;
2239   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2240   PetscBLASInt   bnz,one=1;
2241   Mat_SeqAIJ     *x,*y;
2242 
2243   PetscFunctionBegin;
2244   if (str == SAME_NONZERO_PATTERN) {
2245     PetscScalar alpha = a;
2246     x    = (Mat_SeqAIJ*)xx->A->data;
2247     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2248     y    = (Mat_SeqAIJ*)yy->A->data;
2249     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2250     x    = (Mat_SeqAIJ*)xx->B->data;
2251     y    = (Mat_SeqAIJ*)yy->B->data;
2252     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2253     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2254     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2255   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2256     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2257   } else {
2258     Mat      B;
2259     PetscInt *nnz_d,*nnz_o;
2260     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2261     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2262     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2263     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2264     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2265     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2266     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2267     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2268     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2269     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2270     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2271     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2272     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2273     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2274   }
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2279 
2280 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2281 {
2282 #if defined(PETSC_USE_COMPLEX)
2283   PetscErrorCode ierr;
2284   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2285 
2286   PetscFunctionBegin;
2287   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2288   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2289 #else
2290   PetscFunctionBegin;
2291 #endif
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2296 {
2297   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2302   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2307 {
2308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2309   PetscErrorCode ierr;
2310 
2311   PetscFunctionBegin;
2312   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2313   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2318 {
2319   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2320   PetscErrorCode ierr;
2321   PetscInt       i,*idxb = 0;
2322   PetscScalar    *va,*vb;
2323   Vec            vtmp;
2324 
2325   PetscFunctionBegin;
2326   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2327   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2328   if (idx) {
2329     for (i=0; i<A->rmap->n; i++) {
2330       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2331     }
2332   }
2333 
2334   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2335   if (idx) {
2336     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2337   }
2338   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2339   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2340 
2341   for (i=0; i<A->rmap->n; i++) {
2342     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2343       va[i] = vb[i];
2344       if (idx) idx[i] = a->garray[idxb[i]];
2345     }
2346   }
2347 
2348   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2349   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2350   ierr = PetscFree(idxb);CHKERRQ(ierr);
2351   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2356 {
2357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2358   PetscErrorCode ierr;
2359   PetscInt       i,*idxb = 0;
2360   PetscScalar    *va,*vb;
2361   Vec            vtmp;
2362 
2363   PetscFunctionBegin;
2364   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2365   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2366   if (idx) {
2367     for (i=0; i<A->cmap->n; i++) {
2368       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2369     }
2370   }
2371 
2372   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2373   if (idx) {
2374     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2375   }
2376   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2377   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2378 
2379   for (i=0; i<A->rmap->n; i++) {
2380     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2381       va[i] = vb[i];
2382       if (idx) idx[i] = a->garray[idxb[i]];
2383     }
2384   }
2385 
2386   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2387   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2388   ierr = PetscFree(idxb);CHKERRQ(ierr);
2389   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2396   PetscInt       n      = A->rmap->n;
2397   PetscInt       cstart = A->cmap->rstart;
2398   PetscInt       *cmap  = mat->garray;
2399   PetscInt       *diagIdx, *offdiagIdx;
2400   Vec            diagV, offdiagV;
2401   PetscScalar    *a, *diagA, *offdiagA;
2402   PetscInt       r;
2403   PetscErrorCode ierr;
2404 
2405   PetscFunctionBegin;
2406   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2407   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2408   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2409   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2410   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2411   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2412   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2413   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2414   for (r = 0; r < n; ++r) {
2415     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2416       a[r]   = diagA[r];
2417       idx[r] = cstart + diagIdx[r];
2418     } else {
2419       a[r]   = offdiagA[r];
2420       idx[r] = cmap[offdiagIdx[r]];
2421     }
2422   }
2423   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2424   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2426   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2427   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2428   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2435   PetscInt       n      = A->rmap->n;
2436   PetscInt       cstart = A->cmap->rstart;
2437   PetscInt       *cmap  = mat->garray;
2438   PetscInt       *diagIdx, *offdiagIdx;
2439   Vec            diagV, offdiagV;
2440   PetscScalar    *a, *diagA, *offdiagA;
2441   PetscInt       r;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2447   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2448   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2449   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2451   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2452   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2453   for (r = 0; r < n; ++r) {
2454     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2455       a[r]   = diagA[r];
2456       idx[r] = cstart + diagIdx[r];
2457     } else {
2458       a[r]   = offdiagA[r];
2459       idx[r] = cmap[offdiagIdx[r]];
2460     }
2461   }
2462   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2465   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2466   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2467   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2472 {
2473   PetscErrorCode ierr;
2474   Mat            *dummy;
2475 
2476   PetscFunctionBegin;
2477   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2478   *newmat = *dummy;
2479   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2484 {
2485   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2486   PetscErrorCode ierr;
2487 
2488   PetscFunctionBegin;
2489   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2490   A->factorerrortype = a->A->factorerrortype;
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2495 {
2496   PetscErrorCode ierr;
2497   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2498 
2499   PetscFunctionBegin;
2500   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2501   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2502   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2503   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2508 {
2509   PetscFunctionBegin;
2510   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2511   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 /*@
2516    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2517 
2518    Collective on Mat
2519 
2520    Input Parameters:
2521 +    A - the matrix
2522 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2523 
2524  Level: advanced
2525 
2526 @*/
2527 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2528 {
2529   PetscErrorCode       ierr;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2537 {
2538   PetscErrorCode       ierr;
2539   PetscBool            sc = PETSC_FALSE,flg;
2540 
2541   PetscFunctionBegin;
2542   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2543   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2544   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2545   if (flg) {
2546     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2547   }
2548   ierr = PetscOptionsTail();CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2553 {
2554   PetscErrorCode ierr;
2555   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2556   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2557 
2558   PetscFunctionBegin;
2559   if (!Y->preallocated) {
2560     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2561   } else if (!aij->nz) {
2562     PetscInt nonew = aij->nonew;
2563     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2564     aij->nonew = nonew;
2565   }
2566   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2571 {
2572   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2573   PetscErrorCode ierr;
2574 
2575   PetscFunctionBegin;
2576   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2577   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2578   if (d) {
2579     PetscInt rstart;
2580     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2581     *d += rstart;
2582 
2583   }
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2588 {
2589   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 /* -------------------------------------------------------------------*/
2598 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2599                                        MatGetRow_MPIAIJ,
2600                                        MatRestoreRow_MPIAIJ,
2601                                        MatMult_MPIAIJ,
2602                                 /* 4*/ MatMultAdd_MPIAIJ,
2603                                        MatMultTranspose_MPIAIJ,
2604                                        MatMultTransposeAdd_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*10*/ 0,
2609                                        0,
2610                                        0,
2611                                        MatSOR_MPIAIJ,
2612                                        MatTranspose_MPIAIJ,
2613                                 /*15*/ MatGetInfo_MPIAIJ,
2614                                        MatEqual_MPIAIJ,
2615                                        MatGetDiagonal_MPIAIJ,
2616                                        MatDiagonalScale_MPIAIJ,
2617                                        MatNorm_MPIAIJ,
2618                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2619                                        MatAssemblyEnd_MPIAIJ,
2620                                        MatSetOption_MPIAIJ,
2621                                        MatZeroEntries_MPIAIJ,
2622                                 /*24*/ MatZeroRows_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*29*/ MatSetUp_MPIAIJ,
2628                                        0,
2629                                        0,
2630                                        MatGetDiagonalBlock_MPIAIJ,
2631                                        0,
2632                                 /*34*/ MatDuplicate_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*39*/ MatAXPY_MPIAIJ,
2638                                        MatCreateSubMatrices_MPIAIJ,
2639                                        MatIncreaseOverlap_MPIAIJ,
2640                                        MatGetValues_MPIAIJ,
2641                                        MatCopy_MPIAIJ,
2642                                 /*44*/ MatGetRowMax_MPIAIJ,
2643                                        MatScale_MPIAIJ,
2644                                        MatShift_MPIAIJ,
2645                                        MatDiagonalSet_MPIAIJ,
2646                                        MatZeroRowsColumns_MPIAIJ,
2647                                 /*49*/ MatSetRandom_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2653                                        0,
2654                                        MatSetUnfactored_MPIAIJ,
2655                                        MatPermute_MPIAIJ,
2656                                        0,
2657                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2658                                        MatDestroy_MPIAIJ,
2659                                        MatView_MPIAIJ,
2660                                        0,
2661                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2662                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2663                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2668                                        MatGetRowMinAbs_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                 /*75*/ MatFDColoringApply_AIJ,
2674                                        MatSetFromOptions_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        MatFindZeroDiagonals_MPIAIJ,
2678                                 /*80*/ 0,
2679                                        0,
2680                                        0,
2681                                 /*83*/ MatLoad_MPIAIJ,
2682                                        MatIsSymmetric_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2688                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2689                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2690                                        MatPtAP_MPIAIJ_MPIAIJ,
2691                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2692                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                 /*99*/ 0,
2698                                        0,
2699                                        0,
2700                                        MatConjugate_MPIAIJ,
2701                                        0,
2702                                 /*104*/MatSetValuesRow_MPIAIJ,
2703                                        MatRealPart_MPIAIJ,
2704                                        MatImaginaryPart_MPIAIJ,
2705                                        0,
2706                                        0,
2707                                 /*109*/0,
2708                                        0,
2709                                        MatGetRowMin_MPIAIJ,
2710                                        0,
2711                                        MatMissingDiagonal_MPIAIJ,
2712                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2713                                        0,
2714                                        MatGetGhosts_MPIAIJ,
2715                                        0,
2716                                        0,
2717                                 /*119*/0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        MatGetMultiProcBlock_MPIAIJ,
2722                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2723                                        MatGetColumnNorms_MPIAIJ,
2724                                        MatInvertBlockDiagonal_MPIAIJ,
2725                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2726                                        MatCreateSubMatricesMPI_MPIAIJ,
2727                                 /*129*/0,
2728                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2729                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2730                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2731                                        0,
2732                                 /*134*/0,
2733                                        0,
2734                                        MatRARt_MPIAIJ_MPIAIJ,
2735                                        0,
2736                                        0,
2737                                 /*139*/MatSetBlockSizes_MPIAIJ,
2738                                        0,
2739                                        0,
2740                                        MatFDColoringSetUp_MPIXAIJ,
2741                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2742                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2743 };
2744 
2745 /* ----------------------------------------------------------------------------------------*/
2746 
2747 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2748 {
2749   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2754   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2759 {
2760   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2761   PetscErrorCode ierr;
2762 
2763   PetscFunctionBegin;
2764   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2765   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2770 {
2771   Mat_MPIAIJ     *b;
2772   PetscErrorCode ierr;
2773 
2774   PetscFunctionBegin;
2775   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2776   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2777   b = (Mat_MPIAIJ*)B->data;
2778 
2779 #if defined(PETSC_USE_CTABLE)
2780   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2781 #else
2782   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2783 #endif
2784   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2785   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2786   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2787 
2788   /* Because the B will have been resized we simply destroy it and create a new one each time */
2789   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2790   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2791   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2792   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2793   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2794   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2795 
2796   if (!B->preallocated) {
2797     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2798     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2799     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2800     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2801     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2802   }
2803 
2804   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2805   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2806   B->preallocated  = PETSC_TRUE;
2807   B->was_assembled = PETSC_FALSE;
2808   B->assembled     = PETSC_FALSE;;
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2813 {
2814   Mat_MPIAIJ     *b;
2815   PetscErrorCode ierr;
2816 
2817   PetscFunctionBegin;
2818   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   b = (Mat_MPIAIJ*)B->data;
2822 
2823 #if defined(PETSC_USE_CTABLE)
2824   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2825 #else
2826   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2827 #endif
2828   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2829   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2830   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2831 
2832   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2833   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2834   B->preallocated  = PETSC_TRUE;
2835   B->was_assembled = PETSC_FALSE;
2836   B->assembled = PETSC_FALSE;
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2841 {
2842   Mat            mat;
2843   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2844   PetscErrorCode ierr;
2845 
2846   PetscFunctionBegin;
2847   *newmat = 0;
2848   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2849   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2850   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2851   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2852   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2853   a       = (Mat_MPIAIJ*)mat->data;
2854 
2855   mat->factortype   = matin->factortype;
2856   mat->assembled    = PETSC_TRUE;
2857   mat->insertmode   = NOT_SET_VALUES;
2858   mat->preallocated = PETSC_TRUE;
2859 
2860   a->size         = oldmat->size;
2861   a->rank         = oldmat->rank;
2862   a->donotstash   = oldmat->donotstash;
2863   a->roworiented  = oldmat->roworiented;
2864   a->rowindices   = 0;
2865   a->rowvalues    = 0;
2866   a->getrowactive = PETSC_FALSE;
2867 
2868   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2869   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2870 
2871   if (oldmat->colmap) {
2872 #if defined(PETSC_USE_CTABLE)
2873     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2874 #else
2875     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2876     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2877     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2878 #endif
2879   } else a->colmap = 0;
2880   if (oldmat->garray) {
2881     PetscInt len;
2882     len  = oldmat->B->cmap->n;
2883     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2884     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2885     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2886   } else a->garray = 0;
2887 
2888   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2889   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2890   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2891   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2892 
2893   if (oldmat->Mvctx_mpi1) {
2894     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2895     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2896   }
2897 
2898   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2899   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2900   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2901   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2902   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2903   *newmat = mat;
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2908 {
2909   PetscBool      isbinary, ishdf5;
2910   PetscErrorCode ierr;
2911 
2912   PetscFunctionBegin;
2913   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2914   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2915   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2916   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2917   if (isbinary) {
2918     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2919   } else if (ishdf5) {
2920 #if defined(PETSC_HAVE_HDF5)
2921     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2922 #else
2923     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2924 #endif
2925   } else {
2926     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2927   }
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2932 {
2933   PetscScalar    *vals,*svals;
2934   MPI_Comm       comm;
2935   PetscErrorCode ierr;
2936   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2937   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2938   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2939   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2940   PetscInt       cend,cstart,n,*rowners;
2941   int            fd;
2942   PetscInt       bs = newMat->rmap->bs;
2943 
2944   PetscFunctionBegin;
2945   /* force binary viewer to load .info file if it has not yet done so */
2946   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2947   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2949   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2950   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2951   if (!rank) {
2952     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2953     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2954     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2955   }
2956 
2957   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2958   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2959   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2960   if (bs < 0) bs = 1;
2961 
2962   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2963   M    = header[1]; N = header[2];
2964 
2965   /* If global sizes are set, check if they are consistent with that given in the file */
2966   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2967   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2968 
2969   /* determine ownership of all (block) rows */
2970   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2971   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2972   else m = newMat->rmap->n; /* Set by user */
2973 
2974   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2975   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2976 
2977   /* First process needs enough room for process with most rows */
2978   if (!rank) {
2979     mmax = rowners[1];
2980     for (i=2; i<=size; i++) {
2981       mmax = PetscMax(mmax, rowners[i]);
2982     }
2983   } else mmax = -1;             /* unused, but compilers complain */
2984 
2985   rowners[0] = 0;
2986   for (i=2; i<=size; i++) {
2987     rowners[i] += rowners[i-1];
2988   }
2989   rstart = rowners[rank];
2990   rend   = rowners[rank+1];
2991 
2992   /* distribute row lengths to all processors */
2993   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2994   if (!rank) {
2995     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2996     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2997     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2998     for (j=0; j<m; j++) {
2999       procsnz[0] += ourlens[j];
3000     }
3001     for (i=1; i<size; i++) {
3002       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3003       /* calculate the number of nonzeros on each processor */
3004       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3005         procsnz[i] += rowlengths[j];
3006       }
3007       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3008     }
3009     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3010   } else {
3011     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3012   }
3013 
3014   if (!rank) {
3015     /* determine max buffer needed and allocate it */
3016     maxnz = 0;
3017     for (i=0; i<size; i++) {
3018       maxnz = PetscMax(maxnz,procsnz[i]);
3019     }
3020     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3021 
3022     /* read in my part of the matrix column indices  */
3023     nz   = procsnz[0];
3024     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3025     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3026 
3027     /* read in every one elses and ship off */
3028     for (i=1; i<size; i++) {
3029       nz   = procsnz[i];
3030       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3031       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3032     }
3033     ierr = PetscFree(cols);CHKERRQ(ierr);
3034   } else {
3035     /* determine buffer space needed for message */
3036     nz = 0;
3037     for (i=0; i<m; i++) {
3038       nz += ourlens[i];
3039     }
3040     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3041 
3042     /* receive message of column indices*/
3043     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3044   }
3045 
3046   /* determine column ownership if matrix is not square */
3047   if (N != M) {
3048     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3049     else n = newMat->cmap->n;
3050     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3051     cstart = cend - n;
3052   } else {
3053     cstart = rstart;
3054     cend   = rend;
3055     n      = cend - cstart;
3056   }
3057 
3058   /* loop over local rows, determining number of off diagonal entries */
3059   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3060   jj   = 0;
3061   for (i=0; i<m; i++) {
3062     for (j=0; j<ourlens[i]; j++) {
3063       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3064       jj++;
3065     }
3066   }
3067 
3068   for (i=0; i<m; i++) {
3069     ourlens[i] -= offlens[i];
3070   }
3071   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3072 
3073   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3074 
3075   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3076 
3077   for (i=0; i<m; i++) {
3078     ourlens[i] += offlens[i];
3079   }
3080 
3081   if (!rank) {
3082     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3083 
3084     /* read in my part of the matrix numerical values  */
3085     nz   = procsnz[0];
3086     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3087 
3088     /* insert into matrix */
3089     jj      = rstart;
3090     smycols = mycols;
3091     svals   = vals;
3092     for (i=0; i<m; i++) {
3093       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3094       smycols += ourlens[i];
3095       svals   += ourlens[i];
3096       jj++;
3097     }
3098 
3099     /* read in other processors and ship out */
3100     for (i=1; i<size; i++) {
3101       nz   = procsnz[i];
3102       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3103       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3104     }
3105     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3106   } else {
3107     /* receive numeric values */
3108     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3109 
3110     /* receive message of values*/
3111     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3112 
3113     /* insert into matrix */
3114     jj      = rstart;
3115     smycols = mycols;
3116     svals   = vals;
3117     for (i=0; i<m; i++) {
3118       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3119       smycols += ourlens[i];
3120       svals   += ourlens[i];
3121       jj++;
3122     }
3123   }
3124   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3125   ierr = PetscFree(vals);CHKERRQ(ierr);
3126   ierr = PetscFree(mycols);CHKERRQ(ierr);
3127   ierr = PetscFree(rowners);CHKERRQ(ierr);
3128   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3129   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 /* Not scalable because of ISAllGather() unless getting all columns. */
3134 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3135 {
3136   PetscErrorCode ierr;
3137   IS             iscol_local;
3138   PetscBool      isstride;
3139   PetscMPIInt    lisstride=0,gisstride;
3140 
3141   PetscFunctionBegin;
3142   /* check if we are grabbing all columns*/
3143   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3144 
3145   if (isstride) {
3146     PetscInt  start,len,mstart,mlen;
3147     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3148     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3149     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3150     if (mstart == start && mlen-mstart == len) lisstride = 1;
3151   }
3152 
3153   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3154   if (gisstride) {
3155     PetscInt N;
3156     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3157     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3158     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3159     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3160   } else {
3161     PetscInt cbs;
3162     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3163     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3164     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3165   }
3166 
3167   *isseq = iscol_local;
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 /*
3172  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3173  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3174 
3175  Input Parameters:
3176    mat - matrix
3177    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3178            i.e., mat->rstart <= isrow[i] < mat->rend
3179    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3180            i.e., mat->cstart <= iscol[i] < mat->cend
3181  Output Parameter:
3182    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3183    iscol_o - sequential column index set for retrieving mat->B
3184    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3185  */
3186 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3187 {
3188   PetscErrorCode ierr;
3189   Vec            x,cmap;
3190   const PetscInt *is_idx;
3191   PetscScalar    *xarray,*cmaparray;
3192   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3193   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3194   Mat            B=a->B;
3195   Vec            lvec=a->lvec,lcmap;
3196   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3197   MPI_Comm       comm;
3198   VecScatter     Mvctx=a->Mvctx;
3199 
3200   PetscFunctionBegin;
3201   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3202   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3203 
3204   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3205   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3206   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3207   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3208   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3209 
3210   /* Get start indices */
3211   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3212   isstart -= ncols;
3213   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3214 
3215   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3216   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3217   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3218   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3219   for (i=0; i<ncols; i++) {
3220     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3221     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3222     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3223   }
3224   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3225   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3226   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3227 
3228   /* Get iscol_d */
3229   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3230   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3231   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3232 
3233   /* Get isrow_d */
3234   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3235   rstart = mat->rmap->rstart;
3236   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3237   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3238   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3239   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3240 
3241   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3242   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3243   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3244 
3245   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3246   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3247   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3248 
3249   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3250 
3251   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3252   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3253 
3254   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3255   /* off-process column indices */
3256   count = 0;
3257   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3258   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3259 
3260   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3261   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3262   for (i=0; i<Bn; i++) {
3263     if (PetscRealPart(xarray[i]) > -1.0) {
3264       idx[count]     = i;                   /* local column index in off-diagonal part B */
3265       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3266       count++;
3267     }
3268   }
3269   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3270   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3271 
3272   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3273   /* cannot ensure iscol_o has same blocksize as iscol! */
3274 
3275   ierr = PetscFree(idx);CHKERRQ(ierr);
3276   *garray = cmap1;
3277 
3278   ierr = VecDestroy(&x);CHKERRQ(ierr);
3279   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3280   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3281   PetscFunctionReturn(0);
3282 }
3283 
3284 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3285 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3286 {
3287   PetscErrorCode ierr;
3288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3289   Mat            M = NULL;
3290   MPI_Comm       comm;
3291   IS             iscol_d,isrow_d,iscol_o;
3292   Mat            Asub = NULL,Bsub = NULL;
3293   PetscInt       n;
3294 
3295   PetscFunctionBegin;
3296   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3297 
3298   if (call == MAT_REUSE_MATRIX) {
3299     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3300     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3301     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3302 
3303     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3304     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3305 
3306     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3307     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3308 
3309     /* Update diagonal and off-diagonal portions of submat */
3310     asub = (Mat_MPIAIJ*)(*submat)->data;
3311     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3312     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3313     if (n) {
3314       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3315     }
3316     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3317     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3318 
3319   } else { /* call == MAT_INITIAL_MATRIX) */
3320     const PetscInt *garray;
3321     PetscInt        BsubN;
3322 
3323     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3324     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3325 
3326     /* Create local submatrices Asub and Bsub */
3327     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3328     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3329 
3330     /* Create submatrix M */
3331     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3332 
3333     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3334     asub = (Mat_MPIAIJ*)M->data;
3335 
3336     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3337     n = asub->B->cmap->N;
3338     if (BsubN > n) {
3339       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3340       const PetscInt *idx;
3341       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3342       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3343 
3344       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3345       j = 0;
3346       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3347       for (i=0; i<n; i++) {
3348         if (j >= BsubN) break;
3349         while (subgarray[i] > garray[j]) j++;
3350 
3351         if (subgarray[i] == garray[j]) {
3352           idx_new[i] = idx[j++];
3353         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3354       }
3355       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3356 
3357       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3358       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3359 
3360     } else if (BsubN < n) {
3361       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3362     }
3363 
3364     ierr = PetscFree(garray);CHKERRQ(ierr);
3365     *submat = M;
3366 
3367     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3368     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3369     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3370 
3371     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3372     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3373 
3374     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3375     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3376   }
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3381 {
3382   PetscErrorCode ierr;
3383   IS             iscol_local=NULL,isrow_d;
3384   PetscInt       csize;
3385   PetscInt       n,i,j,start,end;
3386   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3387   MPI_Comm       comm;
3388 
3389   PetscFunctionBegin;
3390   /* If isrow has same processor distribution as mat,
3391      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3392   if (call == MAT_REUSE_MATRIX) {
3393     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3394     if (isrow_d) {
3395       sameRowDist  = PETSC_TRUE;
3396       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3397     } else {
3398       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3399       if (iscol_local) {
3400         sameRowDist  = PETSC_TRUE;
3401         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3402       }
3403     }
3404   } else {
3405     /* Check if isrow has same processor distribution as mat */
3406     sameDist[0] = PETSC_FALSE;
3407     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3408     if (!n) {
3409       sameDist[0] = PETSC_TRUE;
3410     } else {
3411       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3412       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3413       if (i >= start && j < end) {
3414         sameDist[0] = PETSC_TRUE;
3415       }
3416     }
3417 
3418     /* Check if iscol has same processor distribution as mat */
3419     sameDist[1] = PETSC_FALSE;
3420     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3421     if (!n) {
3422       sameDist[1] = PETSC_TRUE;
3423     } else {
3424       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3425       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3426       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3427     }
3428 
3429     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3430     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3431     sameRowDist = tsameDist[0];
3432   }
3433 
3434   if (sameRowDist) {
3435     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3436       /* isrow and iscol have same processor distribution as mat */
3437       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3438       PetscFunctionReturn(0);
3439     } else { /* sameRowDist */
3440       /* isrow has same processor distribution as mat */
3441       if (call == MAT_INITIAL_MATRIX) {
3442         PetscBool sorted;
3443         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3444         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3445         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3446         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3447 
3448         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3449         if (sorted) {
3450           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3451           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3452           PetscFunctionReturn(0);
3453         }
3454       } else { /* call == MAT_REUSE_MATRIX */
3455         IS    iscol_sub;
3456         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3457         if (iscol_sub) {
3458           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3459           PetscFunctionReturn(0);
3460         }
3461       }
3462     }
3463   }
3464 
3465   /* General case: iscol -> iscol_local which has global size of iscol */
3466   if (call == MAT_REUSE_MATRIX) {
3467     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3468     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3469   } else {
3470     if (!iscol_local) {
3471       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3472     }
3473   }
3474 
3475   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3476   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3477 
3478   if (call == MAT_INITIAL_MATRIX) {
3479     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3480     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3481   }
3482   PetscFunctionReturn(0);
3483 }
3484 
3485 /*@C
3486      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3487          and "off-diagonal" part of the matrix in CSR format.
3488 
3489    Collective on MPI_Comm
3490 
3491    Input Parameters:
3492 +  comm - MPI communicator
3493 .  A - "diagonal" portion of matrix
3494 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3495 -  garray - global index of B columns
3496 
3497    Output Parameter:
3498 .   mat - the matrix, with input A as its local diagonal matrix
3499    Level: advanced
3500 
3501    Notes:
3502        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3503        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3504 
3505 .seealso: MatCreateMPIAIJWithSplitArrays()
3506 @*/
3507 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3508 {
3509   PetscErrorCode ierr;
3510   Mat_MPIAIJ     *maij;
3511   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3512   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3513   PetscScalar    *oa=b->a;
3514   Mat            Bnew;
3515   PetscInt       m,n,N;
3516 
3517   PetscFunctionBegin;
3518   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3519   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3520   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3521   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3522   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3523   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3524 
3525   /* Get global columns of mat */
3526   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3527 
3528   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3529   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3530   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3531   maij = (Mat_MPIAIJ*)(*mat)->data;
3532 
3533   (*mat)->preallocated = PETSC_TRUE;
3534 
3535   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3536   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3537 
3538   /* Set A as diagonal portion of *mat */
3539   maij->A = A;
3540 
3541   nz = oi[m];
3542   for (i=0; i<nz; i++) {
3543     col   = oj[i];
3544     oj[i] = garray[col];
3545   }
3546 
3547    /* Set Bnew as off-diagonal portion of *mat */
3548   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3549   bnew        = (Mat_SeqAIJ*)Bnew->data;
3550   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3551   maij->B     = Bnew;
3552 
3553   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3554 
3555   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3556   b->free_a       = PETSC_FALSE;
3557   b->free_ij      = PETSC_FALSE;
3558   ierr = MatDestroy(&B);CHKERRQ(ierr);
3559 
3560   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3561   bnew->free_a       = PETSC_TRUE;
3562   bnew->free_ij      = PETSC_TRUE;
3563 
3564   /* condense columns of maij->B */
3565   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3566   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3569   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3570   PetscFunctionReturn(0);
3571 }
3572 
3573 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3574 
3575 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3576 {
3577   PetscErrorCode ierr;
3578   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3579   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3580   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3581   Mat            M,Msub,B=a->B;
3582   MatScalar      *aa;
3583   Mat_SeqAIJ     *aij;
3584   PetscInt       *garray = a->garray,*colsub,Ncols;
3585   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3586   IS             iscol_sub,iscmap;
3587   const PetscInt *is_idx,*cmap;
3588   PetscBool      allcolumns=PETSC_FALSE;
3589   MPI_Comm       comm;
3590 
3591   PetscFunctionBegin;
3592   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3593 
3594   if (call == MAT_REUSE_MATRIX) {
3595     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3596     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3597     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3598 
3599     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3600     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3601 
3602     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3603     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3604 
3605     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3606 
3607   } else { /* call == MAT_INITIAL_MATRIX) */
3608     PetscBool flg;
3609 
3610     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3611     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3612 
3613     /* (1) iscol -> nonscalable iscol_local */
3614     /* Check for special case: each processor gets entire matrix columns */
3615     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3616     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3617     if (allcolumns) {
3618       iscol_sub = iscol_local;
3619       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3620       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3621 
3622     } else {
3623       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3624       PetscInt *idx,*cmap1,k;
3625       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3626       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3627       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3628       count = 0;
3629       k     = 0;
3630       for (i=0; i<Ncols; i++) {
3631         j = is_idx[i];
3632         if (j >= cstart && j < cend) {
3633           /* diagonal part of mat */
3634           idx[count]     = j;
3635           cmap1[count++] = i; /* column index in submat */
3636         } else if (Bn) {
3637           /* off-diagonal part of mat */
3638           if (j == garray[k]) {
3639             idx[count]     = j;
3640             cmap1[count++] = i;  /* column index in submat */
3641           } else if (j > garray[k]) {
3642             while (j > garray[k] && k < Bn-1) k++;
3643             if (j == garray[k]) {
3644               idx[count]     = j;
3645               cmap1[count++] = i; /* column index in submat */
3646             }
3647           }
3648         }
3649       }
3650       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3651 
3652       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3653       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3654       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3655 
3656       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3657     }
3658 
3659     /* (3) Create sequential Msub */
3660     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3661   }
3662 
3663   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3664   aij  = (Mat_SeqAIJ*)(Msub)->data;
3665   ii   = aij->i;
3666   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3667 
3668   /*
3669       m - number of local rows
3670       Ncols - number of columns (same on all processors)
3671       rstart - first row in new global matrix generated
3672   */
3673   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3674 
3675   if (call == MAT_INITIAL_MATRIX) {
3676     /* (4) Create parallel newmat */
3677     PetscMPIInt    rank,size;
3678     PetscInt       csize;
3679 
3680     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3681     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3682 
3683     /*
3684         Determine the number of non-zeros in the diagonal and off-diagonal
3685         portions of the matrix in order to do correct preallocation
3686     */
3687 
3688     /* first get start and end of "diagonal" columns */
3689     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3690     if (csize == PETSC_DECIDE) {
3691       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3692       if (mglobal == Ncols) { /* square matrix */
3693         nlocal = m;
3694       } else {
3695         nlocal = Ncols/size + ((Ncols % size) > rank);
3696       }
3697     } else {
3698       nlocal = csize;
3699     }
3700     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3701     rstart = rend - nlocal;
3702     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3703 
3704     /* next, compute all the lengths */
3705     jj    = aij->j;
3706     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3707     olens = dlens + m;
3708     for (i=0; i<m; i++) {
3709       jend = ii[i+1] - ii[i];
3710       olen = 0;
3711       dlen = 0;
3712       for (j=0; j<jend; j++) {
3713         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3714         else dlen++;
3715         jj++;
3716       }
3717       olens[i] = olen;
3718       dlens[i] = dlen;
3719     }
3720 
3721     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3722     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3723 
3724     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3725     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3726     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3727     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3728     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3729     ierr = PetscFree(dlens);CHKERRQ(ierr);
3730 
3731   } else { /* call == MAT_REUSE_MATRIX */
3732     M    = *newmat;
3733     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3734     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3735     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3736     /*
3737          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3738        rather than the slower MatSetValues().
3739     */
3740     M->was_assembled = PETSC_TRUE;
3741     M->assembled     = PETSC_FALSE;
3742   }
3743 
3744   /* (5) Set values of Msub to *newmat */
3745   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3746   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3747 
3748   jj   = aij->j;
3749   aa   = aij->a;
3750   for (i=0; i<m; i++) {
3751     row = rstart + i;
3752     nz  = ii[i+1] - ii[i];
3753     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3754     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3755     jj += nz; aa += nz;
3756   }
3757   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3758 
3759   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761 
3762   ierr = PetscFree(colsub);CHKERRQ(ierr);
3763 
3764   /* save Msub, iscol_sub and iscmap used in processor for next request */
3765   if (call ==  MAT_INITIAL_MATRIX) {
3766     *newmat = M;
3767     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3768     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3769 
3770     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3771     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3772 
3773     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3774     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3775 
3776     if (iscol_local) {
3777       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3778       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3779     }
3780   }
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 /*
3785     Not great since it makes two copies of the submatrix, first an SeqAIJ
3786   in local and then by concatenating the local matrices the end result.
3787   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3788 
3789   Note: This requires a sequential iscol with all indices.
3790 */
3791 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3792 {
3793   PetscErrorCode ierr;
3794   PetscMPIInt    rank,size;
3795   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3796   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3797   Mat            M,Mreuse;
3798   MatScalar      *aa,*vwork;
3799   MPI_Comm       comm;
3800   Mat_SeqAIJ     *aij;
3801   PetscBool      colflag,allcolumns=PETSC_FALSE;
3802 
3803   PetscFunctionBegin;
3804   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3805   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3806   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3807 
3808   /* Check for special case: each processor gets entire matrix columns */
3809   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3810   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3811   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3812 
3813   if (call ==  MAT_REUSE_MATRIX) {
3814     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3815     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3816     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3817   } else {
3818     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3819   }
3820 
3821   /*
3822       m - number of local rows
3823       n - number of columns (same on all processors)
3824       rstart - first row in new global matrix generated
3825   */
3826   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3827   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3828   if (call == MAT_INITIAL_MATRIX) {
3829     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3830     ii  = aij->i;
3831     jj  = aij->j;
3832 
3833     /*
3834         Determine the number of non-zeros in the diagonal and off-diagonal
3835         portions of the matrix in order to do correct preallocation
3836     */
3837 
3838     /* first get start and end of "diagonal" columns */
3839     if (csize == PETSC_DECIDE) {
3840       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3841       if (mglobal == n) { /* square matrix */
3842         nlocal = m;
3843       } else {
3844         nlocal = n/size + ((n % size) > rank);
3845       }
3846     } else {
3847       nlocal = csize;
3848     }
3849     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3850     rstart = rend - nlocal;
3851     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3852 
3853     /* next, compute all the lengths */
3854     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3855     olens = dlens + m;
3856     for (i=0; i<m; i++) {
3857       jend = ii[i+1] - ii[i];
3858       olen = 0;
3859       dlen = 0;
3860       for (j=0; j<jend; j++) {
3861         if (*jj < rstart || *jj >= rend) olen++;
3862         else dlen++;
3863         jj++;
3864       }
3865       olens[i] = olen;
3866       dlens[i] = dlen;
3867     }
3868     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3869     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3870     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3871     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3872     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3873     ierr = PetscFree(dlens);CHKERRQ(ierr);
3874   } else {
3875     PetscInt ml,nl;
3876 
3877     M    = *newmat;
3878     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3879     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3880     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3881     /*
3882          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3883        rather than the slower MatSetValues().
3884     */
3885     M->was_assembled = PETSC_TRUE;
3886     M->assembled     = PETSC_FALSE;
3887   }
3888   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3889   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3890   ii   = aij->i;
3891   jj   = aij->j;
3892   aa   = aij->a;
3893   for (i=0; i<m; i++) {
3894     row   = rstart + i;
3895     nz    = ii[i+1] - ii[i];
3896     cwork = jj;     jj += nz;
3897     vwork = aa;     aa += nz;
3898     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3899   }
3900 
3901   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3902   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3903   *newmat = M;
3904 
3905   /* save submatrix used in processor for next request */
3906   if (call ==  MAT_INITIAL_MATRIX) {
3907     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3908     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3909   }
3910   PetscFunctionReturn(0);
3911 }
3912 
3913 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3914 {
3915   PetscInt       m,cstart, cend,j,nnz,i,d;
3916   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3917   const PetscInt *JJ;
3918   PetscScalar    *values;
3919   PetscErrorCode ierr;
3920   PetscBool      nooffprocentries;
3921 
3922   PetscFunctionBegin;
3923   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3924 
3925   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3926   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3927   m      = B->rmap->n;
3928   cstart = B->cmap->rstart;
3929   cend   = B->cmap->rend;
3930   rstart = B->rmap->rstart;
3931 
3932   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3933 
3934 #if defined(PETSC_USE_DEBUG)
3935   for (i=0; i<m && Ii; i++) {
3936     nnz = Ii[i+1]- Ii[i];
3937     JJ  = J + Ii[i];
3938     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3939     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3940     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3941   }
3942 #endif
3943 
3944   for (i=0; i<m && Ii; i++) {
3945     nnz     = Ii[i+1]- Ii[i];
3946     JJ      = J + Ii[i];
3947     nnz_max = PetscMax(nnz_max,nnz);
3948     d       = 0;
3949     for (j=0; j<nnz; j++) {
3950       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3951     }
3952     d_nnz[i] = d;
3953     o_nnz[i] = nnz - d;
3954   }
3955   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3956   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3957 
3958   if (v) values = (PetscScalar*)v;
3959   else {
3960     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3961   }
3962 
3963   for (i=0; i<m && Ii; i++) {
3964     ii   = i + rstart;
3965     nnz  = Ii[i+1]- Ii[i];
3966     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3967   }
3968   nooffprocentries    = B->nooffprocentries;
3969   B->nooffprocentries = PETSC_TRUE;
3970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3972   B->nooffprocentries = nooffprocentries;
3973 
3974   if (!v) {
3975     ierr = PetscFree(values);CHKERRQ(ierr);
3976   }
3977   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 /*@
3982    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3983    (the default parallel PETSc format).
3984 
3985    Collective on MPI_Comm
3986 
3987    Input Parameters:
3988 +  B - the matrix
3989 .  i - the indices into j for the start of each local row (starts with zero)
3990 .  j - the column indices for each local row (starts with zero)
3991 -  v - optional values in the matrix
3992 
3993    Level: developer
3994 
3995    Notes:
3996        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3997      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3998      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3999 
4000        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4001 
4002        The format which is used for the sparse matrix input, is equivalent to a
4003     row-major ordering.. i.e for the following matrix, the input data expected is
4004     as shown
4005 
4006 $        1 0 0
4007 $        2 0 3     P0
4008 $       -------
4009 $        4 5 6     P1
4010 $
4011 $     Process0 [P0]: rows_owned=[0,1]
4012 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4013 $        j =  {0,0,2}  [size = 3]
4014 $        v =  {1,2,3}  [size = 3]
4015 $
4016 $     Process1 [P1]: rows_owned=[2]
4017 $        i =  {0,3}    [size = nrow+1  = 1+1]
4018 $        j =  {0,1,2}  [size = 3]
4019 $        v =  {4,5,6}  [size = 3]
4020 
4021 .keywords: matrix, aij, compressed row, sparse, parallel
4022 
4023 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4024           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4025 @*/
4026 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4027 {
4028   PetscErrorCode ierr;
4029 
4030   PetscFunctionBegin;
4031   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 /*@C
4036    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4037    (the default parallel PETSc format).  For good matrix assembly performance
4038    the user should preallocate the matrix storage by setting the parameters
4039    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4040    performance can be increased by more than a factor of 50.
4041 
4042    Collective on MPI_Comm
4043 
4044    Input Parameters:
4045 +  B - the matrix
4046 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4047            (same value is used for all local rows)
4048 .  d_nnz - array containing the number of nonzeros in the various rows of the
4049            DIAGONAL portion of the local submatrix (possibly different for each row)
4050            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4051            The size of this array is equal to the number of local rows, i.e 'm'.
4052            For matrices that will be factored, you must leave room for (and set)
4053            the diagonal entry even if it is zero.
4054 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4055            submatrix (same value is used for all local rows).
4056 -  o_nnz - array containing the number of nonzeros in the various rows of the
4057            OFF-DIAGONAL portion of the local submatrix (possibly different for
4058            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4059            structure. The size of this array is equal to the number
4060            of local rows, i.e 'm'.
4061 
4062    If the *_nnz parameter is given then the *_nz parameter is ignored
4063 
4064    The AIJ format (also called the Yale sparse matrix format or
4065    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4066    storage.  The stored row and column indices begin with zero.
4067    See Users-Manual: ch_mat for details.
4068 
4069    The parallel matrix is partitioned such that the first m0 rows belong to
4070    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4071    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4072 
4073    The DIAGONAL portion of the local submatrix of a processor can be defined
4074    as the submatrix which is obtained by extraction the part corresponding to
4075    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4076    first row that belongs to the processor, r2 is the last row belonging to
4077    the this processor, and c1-c2 is range of indices of the local part of a
4078    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4079    common case of a square matrix, the row and column ranges are the same and
4080    the DIAGONAL part is also square. The remaining portion of the local
4081    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4082 
4083    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4084 
4085    You can call MatGetInfo() to get information on how effective the preallocation was;
4086    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4087    You can also run with the option -info and look for messages with the string
4088    malloc in them to see if additional memory allocation was needed.
4089 
4090    Example usage:
4091 
4092    Consider the following 8x8 matrix with 34 non-zero values, that is
4093    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4094    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4095    as follows:
4096 
4097 .vb
4098             1  2  0  |  0  3  0  |  0  4
4099     Proc0   0  5  6  |  7  0  0  |  8  0
4100             9  0 10  | 11  0  0  | 12  0
4101     -------------------------------------
4102            13  0 14  | 15 16 17  |  0  0
4103     Proc1   0 18  0  | 19 20 21  |  0  0
4104             0  0  0  | 22 23  0  | 24  0
4105     -------------------------------------
4106     Proc2  25 26 27  |  0  0 28  | 29  0
4107            30  0  0  | 31 32 33  |  0 34
4108 .ve
4109 
4110    This can be represented as a collection of submatrices as:
4111 
4112 .vb
4113       A B C
4114       D E F
4115       G H I
4116 .ve
4117 
4118    Where the submatrices A,B,C are owned by proc0, D,E,F are
4119    owned by proc1, G,H,I are owned by proc2.
4120 
4121    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4122    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4123    The 'M','N' parameters are 8,8, and have the same values on all procs.
4124 
4125    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4126    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4127    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4128    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4129    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4130    matrix, ans [DF] as another SeqAIJ matrix.
4131 
4132    When d_nz, o_nz parameters are specified, d_nz storage elements are
4133    allocated for every row of the local diagonal submatrix, and o_nz
4134    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4135    One way to choose d_nz and o_nz is to use the max nonzerors per local
4136    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4137    In this case, the values of d_nz,o_nz are:
4138 .vb
4139      proc0 : dnz = 2, o_nz = 2
4140      proc1 : dnz = 3, o_nz = 2
4141      proc2 : dnz = 1, o_nz = 4
4142 .ve
4143    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4144    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4145    for proc3. i.e we are using 12+15+10=37 storage locations to store
4146    34 values.
4147 
4148    When d_nnz, o_nnz parameters are specified, the storage is specified
4149    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4150    In the above case the values for d_nnz,o_nnz are:
4151 .vb
4152      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4153      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4154      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4155 .ve
4156    Here the space allocated is sum of all the above values i.e 34, and
4157    hence pre-allocation is perfect.
4158 
4159    Level: intermediate
4160 
4161 .keywords: matrix, aij, compressed row, sparse, parallel
4162 
4163 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4164           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4165 @*/
4166 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4167 {
4168   PetscErrorCode ierr;
4169 
4170   PetscFunctionBegin;
4171   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4172   PetscValidType(B,1);
4173   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4174   PetscFunctionReturn(0);
4175 }
4176 
4177 /*@
4178      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4179          CSR format the local rows.
4180 
4181    Collective on MPI_Comm
4182 
4183    Input Parameters:
4184 +  comm - MPI communicator
4185 .  m - number of local rows (Cannot be PETSC_DECIDE)
4186 .  n - This value should be the same as the local size used in creating the
4187        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4188        calculated if N is given) For square matrices n is almost always m.
4189 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4190 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4191 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4192 .   j - column indices
4193 -   a - matrix values
4194 
4195    Output Parameter:
4196 .   mat - the matrix
4197 
4198    Level: intermediate
4199 
4200    Notes:
4201        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4202      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4203      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4204 
4205        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4206 
4207        The format which is used for the sparse matrix input, is equivalent to a
4208     row-major ordering.. i.e for the following matrix, the input data expected is
4209     as shown
4210 
4211 $        1 0 0
4212 $        2 0 3     P0
4213 $       -------
4214 $        4 5 6     P1
4215 $
4216 $     Process0 [P0]: rows_owned=[0,1]
4217 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4218 $        j =  {0,0,2}  [size = 3]
4219 $        v =  {1,2,3}  [size = 3]
4220 $
4221 $     Process1 [P1]: rows_owned=[2]
4222 $        i =  {0,3}    [size = nrow+1  = 1+1]
4223 $        j =  {0,1,2}  [size = 3]
4224 $        v =  {4,5,6}  [size = 3]
4225 
4226 .keywords: matrix, aij, compressed row, sparse, parallel
4227 
4228 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4229           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4230 @*/
4231 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4232 {
4233   PetscErrorCode ierr;
4234 
4235   PetscFunctionBegin;
4236   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4237   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4238   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4239   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4240   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4241   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4242   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4243   PetscFunctionReturn(0);
4244 }
4245 
4246 /*@C
4247    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4248    (the default parallel PETSc format).  For good matrix assembly performance
4249    the user should preallocate the matrix storage by setting the parameters
4250    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4251    performance can be increased by more than a factor of 50.
4252 
4253    Collective on MPI_Comm
4254 
4255    Input Parameters:
4256 +  comm - MPI communicator
4257 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4258            This value should be the same as the local size used in creating the
4259            y vector for the matrix-vector product y = Ax.
4260 .  n - This value should be the same as the local size used in creating the
4261        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4262        calculated if N is given) For square matrices n is almost always m.
4263 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4264 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4265 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4266            (same value is used for all local rows)
4267 .  d_nnz - array containing the number of nonzeros in the various rows of the
4268            DIAGONAL portion of the local submatrix (possibly different for each row)
4269            or NULL, if d_nz is used to specify the nonzero structure.
4270            The size of this array is equal to the number of local rows, i.e 'm'.
4271 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4272            submatrix (same value is used for all local rows).
4273 -  o_nnz - array containing the number of nonzeros in the various rows of the
4274            OFF-DIAGONAL portion of the local submatrix (possibly different for
4275            each row) or NULL, if o_nz is used to specify the nonzero
4276            structure. The size of this array is equal to the number
4277            of local rows, i.e 'm'.
4278 
4279    Output Parameter:
4280 .  A - the matrix
4281 
4282    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4283    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4284    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4285 
4286    Notes:
4287    If the *_nnz parameter is given then the *_nz parameter is ignored
4288 
4289    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4290    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4291    storage requirements for this matrix.
4292 
4293    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4294    processor than it must be used on all processors that share the object for
4295    that argument.
4296 
4297    The user MUST specify either the local or global matrix dimensions
4298    (possibly both).
4299 
4300    The parallel matrix is partitioned across processors such that the
4301    first m0 rows belong to process 0, the next m1 rows belong to
4302    process 1, the next m2 rows belong to process 2 etc.. where
4303    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4304    values corresponding to [m x N] submatrix.
4305 
4306    The columns are logically partitioned with the n0 columns belonging
4307    to 0th partition, the next n1 columns belonging to the next
4308    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4309 
4310    The DIAGONAL portion of the local submatrix on any given processor
4311    is the submatrix corresponding to the rows and columns m,n
4312    corresponding to the given processor. i.e diagonal matrix on
4313    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4314    etc. The remaining portion of the local submatrix [m x (N-n)]
4315    constitute the OFF-DIAGONAL portion. The example below better
4316    illustrates this concept.
4317 
4318    For a square global matrix we define each processor's diagonal portion
4319    to be its local rows and the corresponding columns (a square submatrix);
4320    each processor's off-diagonal portion encompasses the remainder of the
4321    local matrix (a rectangular submatrix).
4322 
4323    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4324 
4325    When calling this routine with a single process communicator, a matrix of
4326    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4327    type of communicator, use the construction mechanism
4328 .vb
4329      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4330 .ve
4331 
4332 $     MatCreate(...,&A);
4333 $     MatSetType(A,MATMPIAIJ);
4334 $     MatSetSizes(A, m,n,M,N);
4335 $     MatMPIAIJSetPreallocation(A,...);
4336 
4337    By default, this format uses inodes (identical nodes) when possible.
4338    We search for consecutive rows with the same nonzero structure, thereby
4339    reusing matrix information to achieve increased efficiency.
4340 
4341    Options Database Keys:
4342 +  -mat_no_inode  - Do not use inodes
4343 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4344 
4345 
4346 
4347    Example usage:
4348 
4349    Consider the following 8x8 matrix with 34 non-zero values, that is
4350    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4351    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4352    as follows
4353 
4354 .vb
4355             1  2  0  |  0  3  0  |  0  4
4356     Proc0   0  5  6  |  7  0  0  |  8  0
4357             9  0 10  | 11  0  0  | 12  0
4358     -------------------------------------
4359            13  0 14  | 15 16 17  |  0  0
4360     Proc1   0 18  0  | 19 20 21  |  0  0
4361             0  0  0  | 22 23  0  | 24  0
4362     -------------------------------------
4363     Proc2  25 26 27  |  0  0 28  | 29  0
4364            30  0  0  | 31 32 33  |  0 34
4365 .ve
4366 
4367    This can be represented as a collection of submatrices as
4368 
4369 .vb
4370       A B C
4371       D E F
4372       G H I
4373 .ve
4374 
4375    Where the submatrices A,B,C are owned by proc0, D,E,F are
4376    owned by proc1, G,H,I are owned by proc2.
4377 
4378    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4379    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4380    The 'M','N' parameters are 8,8, and have the same values on all procs.
4381 
4382    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4383    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4384    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4385    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4386    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4387    matrix, ans [DF] as another SeqAIJ matrix.
4388 
4389    When d_nz, o_nz parameters are specified, d_nz storage elements are
4390    allocated for every row of the local diagonal submatrix, and o_nz
4391    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4392    One way to choose d_nz and o_nz is to use the max nonzerors per local
4393    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4394    In this case, the values of d_nz,o_nz are
4395 .vb
4396      proc0 : dnz = 2, o_nz = 2
4397      proc1 : dnz = 3, o_nz = 2
4398      proc2 : dnz = 1, o_nz = 4
4399 .ve
4400    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4401    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4402    for proc3. i.e we are using 12+15+10=37 storage locations to store
4403    34 values.
4404 
4405    When d_nnz, o_nnz parameters are specified, the storage is specified
4406    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4407    In the above case the values for d_nnz,o_nnz are
4408 .vb
4409      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4410      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4411      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4412 .ve
4413    Here the space allocated is sum of all the above values i.e 34, and
4414    hence pre-allocation is perfect.
4415 
4416    Level: intermediate
4417 
4418 .keywords: matrix, aij, compressed row, sparse, parallel
4419 
4420 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4421           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4422 @*/
4423 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4424 {
4425   PetscErrorCode ierr;
4426   PetscMPIInt    size;
4427 
4428   PetscFunctionBegin;
4429   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4430   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4431   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4432   if (size > 1) {
4433     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4434     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4435   } else {
4436     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4437     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4438   }
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4443 {
4444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4445   PetscBool      flg;
4446   PetscErrorCode ierr;
4447 
4448   PetscFunctionBegin;
4449   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4450   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4451   if (Ad)     *Ad     = a->A;
4452   if (Ao)     *Ao     = a->B;
4453   if (colmap) *colmap = a->garray;
4454   PetscFunctionReturn(0);
4455 }
4456 
4457 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4458 {
4459   PetscErrorCode ierr;
4460   PetscInt       m,N,i,rstart,nnz,Ii;
4461   PetscInt       *indx;
4462   PetscScalar    *values;
4463 
4464   PetscFunctionBegin;
4465   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4466   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4467     PetscInt       *dnz,*onz,sum,bs,cbs;
4468 
4469     if (n == PETSC_DECIDE) {
4470       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4471     }
4472     /* Check sum(n) = N */
4473     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4474     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4475 
4476     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4477     rstart -= m;
4478 
4479     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4480     for (i=0; i<m; i++) {
4481       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4482       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4483       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4484     }
4485 
4486     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4487     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4488     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4489     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4490     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4491     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4492     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4493     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4494   }
4495 
4496   /* numeric phase */
4497   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4498   for (i=0; i<m; i++) {
4499     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4500     Ii   = i + rstart;
4501     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4502     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4503   }
4504   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4505   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4510 {
4511   PetscErrorCode    ierr;
4512   PetscMPIInt       rank;
4513   PetscInt          m,N,i,rstart,nnz;
4514   size_t            len;
4515   const PetscInt    *indx;
4516   PetscViewer       out;
4517   char              *name;
4518   Mat               B;
4519   const PetscScalar *values;
4520 
4521   PetscFunctionBegin;
4522   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4523   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4524   /* Should this be the type of the diagonal block of A? */
4525   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4526   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4527   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4528   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4529   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4530   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4531   for (i=0; i<m; i++) {
4532     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4533     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4534     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4535   }
4536   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4537   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4538 
4539   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4540   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4541   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4542   sprintf(name,"%s.%d",outfile,rank);
4543   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4544   ierr = PetscFree(name);CHKERRQ(ierr);
4545   ierr = MatView(B,out);CHKERRQ(ierr);
4546   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4547   ierr = MatDestroy(&B);CHKERRQ(ierr);
4548   PetscFunctionReturn(0);
4549 }
4550 
4551 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4552 {
4553   PetscErrorCode      ierr;
4554   Mat_Merge_SeqsToMPI *merge;
4555   PetscContainer      container;
4556 
4557   PetscFunctionBegin;
4558   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4559   if (container) {
4560     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4564     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4565     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4566     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4568     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4573     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4574     ierr = PetscFree(merge);CHKERRQ(ierr);
4575     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4576   }
4577   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4578   PetscFunctionReturn(0);
4579 }
4580 
4581 #include <../src/mat/utils/freespace.h>
4582 #include <petscbt.h>
4583 
4584 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4585 {
4586   PetscErrorCode      ierr;
4587   MPI_Comm            comm;
4588   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4589   PetscMPIInt         size,rank,taga,*len_s;
4590   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4591   PetscInt            proc,m;
4592   PetscInt            **buf_ri,**buf_rj;
4593   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4594   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4595   MPI_Request         *s_waits,*r_waits;
4596   MPI_Status          *status;
4597   MatScalar           *aa=a->a;
4598   MatScalar           **abuf_r,*ba_i;
4599   Mat_Merge_SeqsToMPI *merge;
4600   PetscContainer      container;
4601 
4602   PetscFunctionBegin;
4603   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4604   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4605 
4606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4607   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4608 
4609   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4610   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4611 
4612   bi     = merge->bi;
4613   bj     = merge->bj;
4614   buf_ri = merge->buf_ri;
4615   buf_rj = merge->buf_rj;
4616 
4617   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4618   owners = merge->rowmap->range;
4619   len_s  = merge->len_s;
4620 
4621   /* send and recv matrix values */
4622   /*-----------------------------*/
4623   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4624   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4625 
4626   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4627   for (proc=0,k=0; proc<size; proc++) {
4628     if (!len_s[proc]) continue;
4629     i    = owners[proc];
4630     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4631     k++;
4632   }
4633 
4634   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4635   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4636   ierr = PetscFree(status);CHKERRQ(ierr);
4637 
4638   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4639   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4640 
4641   /* insert mat values of mpimat */
4642   /*----------------------------*/
4643   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4644   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4645 
4646   for (k=0; k<merge->nrecv; k++) {
4647     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4648     nrows       = *(buf_ri_k[k]);
4649     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4650     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4651   }
4652 
4653   /* set values of ba */
4654   m = merge->rowmap->n;
4655   for (i=0; i<m; i++) {
4656     arow = owners[rank] + i;
4657     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4658     bnzi = bi[i+1] - bi[i];
4659     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4660 
4661     /* add local non-zero vals of this proc's seqmat into ba */
4662     anzi   = ai[arow+1] - ai[arow];
4663     aj     = a->j + ai[arow];
4664     aa     = a->a + ai[arow];
4665     nextaj = 0;
4666     for (j=0; nextaj<anzi; j++) {
4667       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4668         ba_i[j] += aa[nextaj++];
4669       }
4670     }
4671 
4672     /* add received vals into ba */
4673     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4674       /* i-th row */
4675       if (i == *nextrow[k]) {
4676         anzi   = *(nextai[k]+1) - *nextai[k];
4677         aj     = buf_rj[k] + *(nextai[k]);
4678         aa     = abuf_r[k] + *(nextai[k]);
4679         nextaj = 0;
4680         for (j=0; nextaj<anzi; j++) {
4681           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4682             ba_i[j] += aa[nextaj++];
4683           }
4684         }
4685         nextrow[k]++; nextai[k]++;
4686       }
4687     }
4688     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4689   }
4690   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4691   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4692 
4693   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4694   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4695   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4696   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4697   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4698   PetscFunctionReturn(0);
4699 }
4700 
4701 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4702 {
4703   PetscErrorCode      ierr;
4704   Mat                 B_mpi;
4705   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4706   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4707   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4708   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4709   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4710   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4711   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4712   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4713   MPI_Status          *status;
4714   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4715   PetscBT             lnkbt;
4716   Mat_Merge_SeqsToMPI *merge;
4717   PetscContainer      container;
4718 
4719   PetscFunctionBegin;
4720   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4721 
4722   /* make sure it is a PETSc comm */
4723   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4724   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4725   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4726 
4727   ierr = PetscNew(&merge);CHKERRQ(ierr);
4728   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4729 
4730   /* determine row ownership */
4731   /*---------------------------------------------------------*/
4732   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4733   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4734   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4735   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4736   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4737   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4738   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4739 
4740   m      = merge->rowmap->n;
4741   owners = merge->rowmap->range;
4742 
4743   /* determine the number of messages to send, their lengths */
4744   /*---------------------------------------------------------*/
4745   len_s = merge->len_s;
4746 
4747   len          = 0; /* length of buf_si[] */
4748   merge->nsend = 0;
4749   for (proc=0; proc<size; proc++) {
4750     len_si[proc] = 0;
4751     if (proc == rank) {
4752       len_s[proc] = 0;
4753     } else {
4754       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4755       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4756     }
4757     if (len_s[proc]) {
4758       merge->nsend++;
4759       nrows = 0;
4760       for (i=owners[proc]; i<owners[proc+1]; i++) {
4761         if (ai[i+1] > ai[i]) nrows++;
4762       }
4763       len_si[proc] = 2*(nrows+1);
4764       len         += len_si[proc];
4765     }
4766   }
4767 
4768   /* determine the number and length of messages to receive for ij-structure */
4769   /*-------------------------------------------------------------------------*/
4770   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4771   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4772 
4773   /* post the Irecv of j-structure */
4774   /*-------------------------------*/
4775   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4776   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4777 
4778   /* post the Isend of j-structure */
4779   /*--------------------------------*/
4780   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4781 
4782   for (proc=0, k=0; proc<size; proc++) {
4783     if (!len_s[proc]) continue;
4784     i    = owners[proc];
4785     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4786     k++;
4787   }
4788 
4789   /* receives and sends of j-structure are complete */
4790   /*------------------------------------------------*/
4791   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4792   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4793 
4794   /* send and recv i-structure */
4795   /*---------------------------*/
4796   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4797   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4798 
4799   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4800   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4801   for (proc=0,k=0; proc<size; proc++) {
4802     if (!len_s[proc]) continue;
4803     /* form outgoing message for i-structure:
4804          buf_si[0]:                 nrows to be sent
4805                [1:nrows]:           row index (global)
4806                [nrows+1:2*nrows+1]: i-structure index
4807     */
4808     /*-------------------------------------------*/
4809     nrows       = len_si[proc]/2 - 1;
4810     buf_si_i    = buf_si + nrows+1;
4811     buf_si[0]   = nrows;
4812     buf_si_i[0] = 0;
4813     nrows       = 0;
4814     for (i=owners[proc]; i<owners[proc+1]; i++) {
4815       anzi = ai[i+1] - ai[i];
4816       if (anzi) {
4817         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4818         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4819         nrows++;
4820       }
4821     }
4822     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4823     k++;
4824     buf_si += len_si[proc];
4825   }
4826 
4827   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4828   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4829 
4830   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4831   for (i=0; i<merge->nrecv; i++) {
4832     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4833   }
4834 
4835   ierr = PetscFree(len_si);CHKERRQ(ierr);
4836   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4837   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4838   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4839   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4840   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4841   ierr = PetscFree(status);CHKERRQ(ierr);
4842 
4843   /* compute a local seq matrix in each processor */
4844   /*----------------------------------------------*/
4845   /* allocate bi array and free space for accumulating nonzero column info */
4846   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4847   bi[0] = 0;
4848 
4849   /* create and initialize a linked list */
4850   nlnk = N+1;
4851   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4852 
4853   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4854   len  = ai[owners[rank+1]] - ai[owners[rank]];
4855   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4856 
4857   current_space = free_space;
4858 
4859   /* determine symbolic info for each local row */
4860   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4861 
4862   for (k=0; k<merge->nrecv; k++) {
4863     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4864     nrows       = *buf_ri_k[k];
4865     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4866     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4867   }
4868 
4869   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4870   len  = 0;
4871   for (i=0; i<m; i++) {
4872     bnzi = 0;
4873     /* add local non-zero cols of this proc's seqmat into lnk */
4874     arow  = owners[rank] + i;
4875     anzi  = ai[arow+1] - ai[arow];
4876     aj    = a->j + ai[arow];
4877     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4878     bnzi += nlnk;
4879     /* add received col data into lnk */
4880     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4881       if (i == *nextrow[k]) { /* i-th row */
4882         anzi  = *(nextai[k]+1) - *nextai[k];
4883         aj    = buf_rj[k] + *nextai[k];
4884         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4885         bnzi += nlnk;
4886         nextrow[k]++; nextai[k]++;
4887       }
4888     }
4889     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4890 
4891     /* if free space is not available, make more free space */
4892     if (current_space->local_remaining<bnzi) {
4893       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4894       nspacedouble++;
4895     }
4896     /* copy data into free space, then initialize lnk */
4897     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4898     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4899 
4900     current_space->array           += bnzi;
4901     current_space->local_used      += bnzi;
4902     current_space->local_remaining -= bnzi;
4903 
4904     bi[i+1] = bi[i] + bnzi;
4905   }
4906 
4907   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4908 
4909   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4910   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4911   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4912 
4913   /* create symbolic parallel matrix B_mpi */
4914   /*---------------------------------------*/
4915   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4916   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4917   if (n==PETSC_DECIDE) {
4918     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4919   } else {
4920     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4921   }
4922   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4923   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4924   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4925   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4926   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4927 
4928   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4929   B_mpi->assembled    = PETSC_FALSE;
4930   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4931   merge->bi           = bi;
4932   merge->bj           = bj;
4933   merge->buf_ri       = buf_ri;
4934   merge->buf_rj       = buf_rj;
4935   merge->coi          = NULL;
4936   merge->coj          = NULL;
4937   merge->owners_co    = NULL;
4938 
4939   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4940 
4941   /* attach the supporting struct to B_mpi for reuse */
4942   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4943   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4944   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4945   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4946   *mpimat = B_mpi;
4947 
4948   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4949   PetscFunctionReturn(0);
4950 }
4951 
4952 /*@C
4953       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4954                  matrices from each processor
4955 
4956     Collective on MPI_Comm
4957 
4958    Input Parameters:
4959 +    comm - the communicators the parallel matrix will live on
4960 .    seqmat - the input sequential matrices
4961 .    m - number of local rows (or PETSC_DECIDE)
4962 .    n - number of local columns (or PETSC_DECIDE)
4963 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4964 
4965    Output Parameter:
4966 .    mpimat - the parallel matrix generated
4967 
4968     Level: advanced
4969 
4970    Notes:
4971      The dimensions of the sequential matrix in each processor MUST be the same.
4972      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4973      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4974 @*/
4975 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4976 {
4977   PetscErrorCode ierr;
4978   PetscMPIInt    size;
4979 
4980   PetscFunctionBegin;
4981   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4982   if (size == 1) {
4983     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4984     if (scall == MAT_INITIAL_MATRIX) {
4985       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4986     } else {
4987       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4988     }
4989     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4990     PetscFunctionReturn(0);
4991   }
4992   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4993   if (scall == MAT_INITIAL_MATRIX) {
4994     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4995   }
4996   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4997   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4998   PetscFunctionReturn(0);
4999 }
5000 
5001 /*@
5002      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5003           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5004           with MatGetSize()
5005 
5006     Not Collective
5007 
5008    Input Parameters:
5009 +    A - the matrix
5010 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5011 
5012    Output Parameter:
5013 .    A_loc - the local sequential matrix generated
5014 
5015     Level: developer
5016 
5017 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5018 
5019 @*/
5020 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5021 {
5022   PetscErrorCode ierr;
5023   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5024   Mat_SeqAIJ     *mat,*a,*b;
5025   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5026   MatScalar      *aa,*ba,*cam;
5027   PetscScalar    *ca;
5028   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5029   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5030   PetscBool      match;
5031   MPI_Comm       comm;
5032   PetscMPIInt    size;
5033 
5034   PetscFunctionBegin;
5035   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5036   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5037   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5038   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5039   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5040 
5041   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5042   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5043   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5044   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5045   aa = a->a; ba = b->a;
5046   if (scall == MAT_INITIAL_MATRIX) {
5047     if (size == 1) {
5048       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5049       PetscFunctionReturn(0);
5050     }
5051 
5052     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5053     ci[0] = 0;
5054     for (i=0; i<am; i++) {
5055       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5056     }
5057     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5058     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5059     k    = 0;
5060     for (i=0; i<am; i++) {
5061       ncols_o = bi[i+1] - bi[i];
5062       ncols_d = ai[i+1] - ai[i];
5063       /* off-diagonal portion of A */
5064       for (jo=0; jo<ncols_o; jo++) {
5065         col = cmap[*bj];
5066         if (col >= cstart) break;
5067         cj[k]   = col; bj++;
5068         ca[k++] = *ba++;
5069       }
5070       /* diagonal portion of A */
5071       for (j=0; j<ncols_d; j++) {
5072         cj[k]   = cstart + *aj++;
5073         ca[k++] = *aa++;
5074       }
5075       /* off-diagonal portion of A */
5076       for (j=jo; j<ncols_o; j++) {
5077         cj[k]   = cmap[*bj++];
5078         ca[k++] = *ba++;
5079       }
5080     }
5081     /* put together the new matrix */
5082     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5083     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5084     /* Since these are PETSc arrays, change flags to free them as necessary. */
5085     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5086     mat->free_a  = PETSC_TRUE;
5087     mat->free_ij = PETSC_TRUE;
5088     mat->nonew   = 0;
5089   } else if (scall == MAT_REUSE_MATRIX) {
5090     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5091     ci = mat->i; cj = mat->j; cam = mat->a;
5092     for (i=0; i<am; i++) {
5093       /* off-diagonal portion of A */
5094       ncols_o = bi[i+1] - bi[i];
5095       for (jo=0; jo<ncols_o; jo++) {
5096         col = cmap[*bj];
5097         if (col >= cstart) break;
5098         *cam++ = *ba++; bj++;
5099       }
5100       /* diagonal portion of A */
5101       ncols_d = ai[i+1] - ai[i];
5102       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5103       /* off-diagonal portion of A */
5104       for (j=jo; j<ncols_o; j++) {
5105         *cam++ = *ba++; bj++;
5106       }
5107     }
5108   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5109   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5110   PetscFunctionReturn(0);
5111 }
5112 
5113 /*@C
5114      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5115 
5116     Not Collective
5117 
5118    Input Parameters:
5119 +    A - the matrix
5120 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5121 -    row, col - index sets of rows and columns to extract (or NULL)
5122 
5123    Output Parameter:
5124 .    A_loc - the local sequential matrix generated
5125 
5126     Level: developer
5127 
5128 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5129 
5130 @*/
5131 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5132 {
5133   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5134   PetscErrorCode ierr;
5135   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5136   IS             isrowa,iscola;
5137   Mat            *aloc;
5138   PetscBool      match;
5139 
5140   PetscFunctionBegin;
5141   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5142   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5143   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5144   if (!row) {
5145     start = A->rmap->rstart; end = A->rmap->rend;
5146     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5147   } else {
5148     isrowa = *row;
5149   }
5150   if (!col) {
5151     start = A->cmap->rstart;
5152     cmap  = a->garray;
5153     nzA   = a->A->cmap->n;
5154     nzB   = a->B->cmap->n;
5155     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5156     ncols = 0;
5157     for (i=0; i<nzB; i++) {
5158       if (cmap[i] < start) idx[ncols++] = cmap[i];
5159       else break;
5160     }
5161     imark = i;
5162     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5163     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5164     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5165   } else {
5166     iscola = *col;
5167   }
5168   if (scall != MAT_INITIAL_MATRIX) {
5169     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5170     aloc[0] = *A_loc;
5171   }
5172   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5173   if (!col) { /* attach global id of condensed columns */
5174     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5175   }
5176   *A_loc = aloc[0];
5177   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5178   if (!row) {
5179     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5180   }
5181   if (!col) {
5182     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5183   }
5184   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5185   PetscFunctionReturn(0);
5186 }
5187 
5188 /*@C
5189     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5190 
5191     Collective on Mat
5192 
5193    Input Parameters:
5194 +    A,B - the matrices in mpiaij format
5195 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5196 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5197 
5198    Output Parameter:
5199 +    rowb, colb - index sets of rows and columns of B to extract
5200 -    B_seq - the sequential matrix generated
5201 
5202     Level: developer
5203 
5204 @*/
5205 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5206 {
5207   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5208   PetscErrorCode ierr;
5209   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5210   IS             isrowb,iscolb;
5211   Mat            *bseq=NULL;
5212 
5213   PetscFunctionBegin;
5214   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5215     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5216   }
5217   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5218 
5219   if (scall == MAT_INITIAL_MATRIX) {
5220     start = A->cmap->rstart;
5221     cmap  = a->garray;
5222     nzA   = a->A->cmap->n;
5223     nzB   = a->B->cmap->n;
5224     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5225     ncols = 0;
5226     for (i=0; i<nzB; i++) {  /* row < local row index */
5227       if (cmap[i] < start) idx[ncols++] = cmap[i];
5228       else break;
5229     }
5230     imark = i;
5231     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5232     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5233     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5234     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5235   } else {
5236     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5237     isrowb  = *rowb; iscolb = *colb;
5238     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5239     bseq[0] = *B_seq;
5240   }
5241   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5242   *B_seq = bseq[0];
5243   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5244   if (!rowb) {
5245     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5246   } else {
5247     *rowb = isrowb;
5248   }
5249   if (!colb) {
5250     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5251   } else {
5252     *colb = iscolb;
5253   }
5254   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5255   PetscFunctionReturn(0);
5256 }
5257 
5258 #include <petsc/private/vecscatterimpl.h>
5259 /*
5260     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5261     of the OFF-DIAGONAL portion of local A
5262 
5263     Collective on Mat
5264 
5265    Input Parameters:
5266 +    A,B - the matrices in mpiaij format
5267 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5268 
5269    Output Parameter:
5270 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5271 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5272 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5273 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5274 
5275     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5276      for this matrix. This is not desirable..
5277 
5278     Level: developer
5279 
5280 */
5281 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5282 {
5283   VecScatter_MPI_General *gen_to,*gen_from;
5284   PetscErrorCode         ierr;
5285   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5286   Mat_SeqAIJ             *b_oth;
5287   VecScatter             ctx;
5288   MPI_Comm               comm;
5289   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5290   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5291   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5292   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5293   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5294   MPI_Request            *rwaits = NULL,*swaits = NULL;
5295   MPI_Status             *sstatus,rstatus;
5296   PetscMPIInt            jj,size;
5297   VecScatterType         type;
5298   PetscBool              mpi1;
5299 
5300   PetscFunctionBegin;
5301   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5302   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5303 
5304   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5305     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5306   }
5307   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5308   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5309 
5310   if (size == 1) {
5311     startsj_s = NULL;
5312     bufa_ptr  = NULL;
5313     *B_oth    = NULL;
5314     PetscFunctionReturn(0);
5315   }
5316 
5317   ctx = a->Mvctx;
5318   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5319   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5320   if (!mpi1) {
5321     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5322      thus create a->Mvctx_mpi1 */
5323     if (!a->Mvctx_mpi1) {
5324       a->Mvctx_mpi1_flg = PETSC_TRUE;
5325       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5326     }
5327     ctx = a->Mvctx_mpi1;
5328   }
5329   tag = ((PetscObject)ctx)->tag;
5330 
5331   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5332   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5333   nrecvs   = gen_from->n;
5334   nsends   = gen_to->n;
5335 
5336   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5337   srow    = gen_to->indices;    /* local row index to be sent */
5338   sstarts = gen_to->starts;
5339   sprocs  = gen_to->procs;
5340   sstatus = gen_to->sstatus;
5341   sbs     = gen_to->bs;
5342   rstarts = gen_from->starts;
5343   rprocs  = gen_from->procs;
5344   rbs     = gen_from->bs;
5345 
5346   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5347   if (scall == MAT_INITIAL_MATRIX) {
5348     /* i-array */
5349     /*---------*/
5350     /*  post receives */
5351     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5352     for (i=0; i<nrecvs; i++) {
5353       rowlen = rvalues + rstarts[i]*rbs;
5354       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5355       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5356     }
5357 
5358     /* pack the outgoing message */
5359     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5360 
5361     sstartsj[0] = 0;
5362     rstartsj[0] = 0;
5363     len         = 0; /* total length of j or a array to be sent */
5364     k           = 0;
5365     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5366     for (i=0; i<nsends; i++) {
5367       rowlen = svalues + sstarts[i]*sbs;
5368       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5369       for (j=0; j<nrows; j++) {
5370         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5371         for (l=0; l<sbs; l++) {
5372           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5373 
5374           rowlen[j*sbs+l] = ncols;
5375 
5376           len += ncols;
5377           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5378         }
5379         k++;
5380       }
5381       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5382 
5383       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5384     }
5385     /* recvs and sends of i-array are completed */
5386     i = nrecvs;
5387     while (i--) {
5388       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5389     }
5390     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5391     ierr = PetscFree(svalues);CHKERRQ(ierr);
5392 
5393     /* allocate buffers for sending j and a arrays */
5394     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5395     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5396 
5397     /* create i-array of B_oth */
5398     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5399 
5400     b_othi[0] = 0;
5401     len       = 0; /* total length of j or a array to be received */
5402     k         = 0;
5403     for (i=0; i<nrecvs; i++) {
5404       rowlen = rvalues + rstarts[i]*rbs;
5405       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5406       for (j=0; j<nrows; j++) {
5407         b_othi[k+1] = b_othi[k] + rowlen[j];
5408         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5409         k++;
5410       }
5411       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5412     }
5413     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5414 
5415     /* allocate space for j and a arrrays of B_oth */
5416     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5417     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5418 
5419     /* j-array */
5420     /*---------*/
5421     /*  post receives of j-array */
5422     for (i=0; i<nrecvs; i++) {
5423       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5424       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5425     }
5426 
5427     /* pack the outgoing message j-array */
5428     k = 0;
5429     for (i=0; i<nsends; i++) {
5430       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5431       bufJ  = bufj+sstartsj[i];
5432       for (j=0; j<nrows; j++) {
5433         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5434         for (ll=0; ll<sbs; ll++) {
5435           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5436           for (l=0; l<ncols; l++) {
5437             *bufJ++ = cols[l];
5438           }
5439           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5440         }
5441       }
5442       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5443     }
5444 
5445     /* recvs and sends of j-array are completed */
5446     i = nrecvs;
5447     while (i--) {
5448       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5449     }
5450     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5451   } else if (scall == MAT_REUSE_MATRIX) {
5452     sstartsj = *startsj_s;
5453     rstartsj = *startsj_r;
5454     bufa     = *bufa_ptr;
5455     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5456     b_otha   = b_oth->a;
5457   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5458 
5459   /* a-array */
5460   /*---------*/
5461   /*  post receives of a-array */
5462   for (i=0; i<nrecvs; i++) {
5463     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5464     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5465   }
5466 
5467   /* pack the outgoing message a-array */
5468   k = 0;
5469   for (i=0; i<nsends; i++) {
5470     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5471     bufA  = bufa+sstartsj[i];
5472     for (j=0; j<nrows; j++) {
5473       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5474       for (ll=0; ll<sbs; ll++) {
5475         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5476         for (l=0; l<ncols; l++) {
5477           *bufA++ = vals[l];
5478         }
5479         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5480       }
5481     }
5482     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5483   }
5484   /* recvs and sends of a-array are completed */
5485   i = nrecvs;
5486   while (i--) {
5487     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5488   }
5489   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5490   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5491 
5492   if (scall == MAT_INITIAL_MATRIX) {
5493     /* put together the new matrix */
5494     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5495 
5496     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5497     /* Since these are PETSc arrays, change flags to free them as necessary. */
5498     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5499     b_oth->free_a  = PETSC_TRUE;
5500     b_oth->free_ij = PETSC_TRUE;
5501     b_oth->nonew   = 0;
5502 
5503     ierr = PetscFree(bufj);CHKERRQ(ierr);
5504     if (!startsj_s || !bufa_ptr) {
5505       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5506       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5507     } else {
5508       *startsj_s = sstartsj;
5509       *startsj_r = rstartsj;
5510       *bufa_ptr  = bufa;
5511     }
5512   }
5513   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 /*@C
5518   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5519 
5520   Not Collective
5521 
5522   Input Parameters:
5523 . A - The matrix in mpiaij format
5524 
5525   Output Parameter:
5526 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5527 . colmap - A map from global column index to local index into lvec
5528 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5529 
5530   Level: developer
5531 
5532 @*/
5533 #if defined(PETSC_USE_CTABLE)
5534 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5535 #else
5536 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5537 #endif
5538 {
5539   Mat_MPIAIJ *a;
5540 
5541   PetscFunctionBegin;
5542   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5543   PetscValidPointer(lvec, 2);
5544   PetscValidPointer(colmap, 3);
5545   PetscValidPointer(multScatter, 4);
5546   a = (Mat_MPIAIJ*) A->data;
5547   if (lvec) *lvec = a->lvec;
5548   if (colmap) *colmap = a->colmap;
5549   if (multScatter) *multScatter = a->Mvctx;
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5554 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5555 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5556 #if defined(PETSC_HAVE_MKL_SPARSE)
5557 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5558 #endif
5559 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5560 #if defined(PETSC_HAVE_ELEMENTAL)
5561 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5562 #endif
5563 #if defined(PETSC_HAVE_HYPRE)
5564 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5565 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5566 #endif
5567 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5568 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5569 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5570 
5571 /*
5572     Computes (B'*A')' since computing B*A directly is untenable
5573 
5574                n                       p                          p
5575         (              )       (              )         (                  )
5576       m (      A       )  *  n (       B      )   =   m (         C        )
5577         (              )       (              )         (                  )
5578 
5579 */
5580 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5581 {
5582   PetscErrorCode ierr;
5583   Mat            At,Bt,Ct;
5584 
5585   PetscFunctionBegin;
5586   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5587   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5588   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5589   ierr = MatDestroy(&At);CHKERRQ(ierr);
5590   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5591   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5592   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5593   PetscFunctionReturn(0);
5594 }
5595 
5596 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5597 {
5598   PetscErrorCode ierr;
5599   PetscInt       m=A->rmap->n,n=B->cmap->n;
5600   Mat            Cmat;
5601 
5602   PetscFunctionBegin;
5603   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5604   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5605   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5606   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5607   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5608   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5609   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5610   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5611 
5612   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5613 
5614   *C = Cmat;
5615   PetscFunctionReturn(0);
5616 }
5617 
5618 /* ----------------------------------------------------------------*/
5619 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5620 {
5621   PetscErrorCode ierr;
5622 
5623   PetscFunctionBegin;
5624   if (scall == MAT_INITIAL_MATRIX) {
5625     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5626     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5627     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5628   }
5629   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5630   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5631   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5632   PetscFunctionReturn(0);
5633 }
5634 
5635 /*MC
5636    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5637 
5638    Options Database Keys:
5639 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5640 
5641   Level: beginner
5642 
5643 .seealso: MatCreateAIJ()
5644 M*/
5645 
5646 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5647 {
5648   Mat_MPIAIJ     *b;
5649   PetscErrorCode ierr;
5650   PetscMPIInt    size;
5651 
5652   PetscFunctionBegin;
5653   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5654 
5655   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5656   B->data       = (void*)b;
5657   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5658   B->assembled  = PETSC_FALSE;
5659   B->insertmode = NOT_SET_VALUES;
5660   b->size       = size;
5661 
5662   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5663 
5664   /* build cache for off array entries formed */
5665   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5666 
5667   b->donotstash  = PETSC_FALSE;
5668   b->colmap      = 0;
5669   b->garray      = 0;
5670   b->roworiented = PETSC_TRUE;
5671 
5672   /* stuff used for matrix vector multiply */
5673   b->lvec  = NULL;
5674   b->Mvctx = NULL;
5675 
5676   /* stuff for MatGetRow() */
5677   b->rowindices   = 0;
5678   b->rowvalues    = 0;
5679   b->getrowactive = PETSC_FALSE;
5680 
5681   /* flexible pointer used in CUSP/CUSPARSE classes */
5682   b->spptr = NULL;
5683 
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5694 #if defined(PETSC_HAVE_MKL_SPARSE)
5695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5696 #endif
5697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5699 #if defined(PETSC_HAVE_ELEMENTAL)
5700   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5701 #endif
5702 #if defined(PETSC_HAVE_HYPRE)
5703   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5704 #endif
5705   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5706   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5707   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5708   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5709   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5710 #if defined(PETSC_HAVE_HYPRE)
5711   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5712 #endif
5713   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5714   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5715   PetscFunctionReturn(0);
5716 }
5717 
5718 /*@C
5719      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5720          and "off-diagonal" part of the matrix in CSR format.
5721 
5722    Collective on MPI_Comm
5723 
5724    Input Parameters:
5725 +  comm - MPI communicator
5726 .  m - number of local rows (Cannot be PETSC_DECIDE)
5727 .  n - This value should be the same as the local size used in creating the
5728        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5729        calculated if N is given) For square matrices n is almost always m.
5730 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5731 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5732 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5733 .   j - column indices
5734 .   a - matrix values
5735 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5736 .   oj - column indices
5737 -   oa - matrix values
5738 
5739    Output Parameter:
5740 .   mat - the matrix
5741 
5742    Level: advanced
5743 
5744    Notes:
5745        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5746        must free the arrays once the matrix has been destroyed and not before.
5747 
5748        The i and j indices are 0 based
5749 
5750        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5751 
5752        This sets local rows and cannot be used to set off-processor values.
5753 
5754        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5755        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5756        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5757        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5758        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5759        communication if it is known that only local entries will be set.
5760 
5761 .keywords: matrix, aij, compressed row, sparse, parallel
5762 
5763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5764           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5765 @*/
5766 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5767 {
5768   PetscErrorCode ierr;
5769   Mat_MPIAIJ     *maij;
5770 
5771   PetscFunctionBegin;
5772   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5773   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5774   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5775   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5776   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5777   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5778   maij = (Mat_MPIAIJ*) (*mat)->data;
5779 
5780   (*mat)->preallocated = PETSC_TRUE;
5781 
5782   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5783   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5784 
5785   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5786   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5787 
5788   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5789   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5790   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5791   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5792 
5793   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5794   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5795   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5796   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5797   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5798   PetscFunctionReturn(0);
5799 }
5800 
5801 /*
5802     Special version for direct calls from Fortran
5803 */
5804 #include <petsc/private/fortranimpl.h>
5805 
5806 /* Change these macros so can be used in void function */
5807 #undef CHKERRQ
5808 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5809 #undef SETERRQ2
5810 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5811 #undef SETERRQ3
5812 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5813 #undef SETERRQ
5814 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5815 
5816 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5817 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5818 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5819 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5820 #else
5821 #endif
5822 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5823 {
5824   Mat            mat  = *mmat;
5825   PetscInt       m    = *mm, n = *mn;
5826   InsertMode     addv = *maddv;
5827   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5828   PetscScalar    value;
5829   PetscErrorCode ierr;
5830 
5831   MatCheckPreallocated(mat,1);
5832   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5833 
5834 #if defined(PETSC_USE_DEBUG)
5835   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5836 #endif
5837   {
5838     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5839     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5840     PetscBool roworiented = aij->roworiented;
5841 
5842     /* Some Variables required in the macro */
5843     Mat        A                 = aij->A;
5844     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5845     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5846     MatScalar  *aa               = a->a;
5847     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5848     Mat        B                 = aij->B;
5849     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5850     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5851     MatScalar  *ba               = b->a;
5852 
5853     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5854     PetscInt  nonew = a->nonew;
5855     MatScalar *ap1,*ap2;
5856 
5857     PetscFunctionBegin;
5858     for (i=0; i<m; i++) {
5859       if (im[i] < 0) continue;
5860 #if defined(PETSC_USE_DEBUG)
5861       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5862 #endif
5863       if (im[i] >= rstart && im[i] < rend) {
5864         row      = im[i] - rstart;
5865         lastcol1 = -1;
5866         rp1      = aj + ai[row];
5867         ap1      = aa + ai[row];
5868         rmax1    = aimax[row];
5869         nrow1    = ailen[row];
5870         low1     = 0;
5871         high1    = nrow1;
5872         lastcol2 = -1;
5873         rp2      = bj + bi[row];
5874         ap2      = ba + bi[row];
5875         rmax2    = bimax[row];
5876         nrow2    = bilen[row];
5877         low2     = 0;
5878         high2    = nrow2;
5879 
5880         for (j=0; j<n; j++) {
5881           if (roworiented) value = v[i*n+j];
5882           else value = v[i+j*m];
5883           if (in[j] >= cstart && in[j] < cend) {
5884             col = in[j] - cstart;
5885             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5886             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5887           } else if (in[j] < 0) continue;
5888 #if defined(PETSC_USE_DEBUG)
5889           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5890           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5891 #endif
5892           else {
5893             if (mat->was_assembled) {
5894               if (!aij->colmap) {
5895                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5896               }
5897 #if defined(PETSC_USE_CTABLE)
5898               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5899               col--;
5900 #else
5901               col = aij->colmap[in[j]] - 1;
5902 #endif
5903               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5904               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5905                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5906                 col  =  in[j];
5907                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5908                 B     = aij->B;
5909                 b     = (Mat_SeqAIJ*)B->data;
5910                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5911                 rp2   = bj + bi[row];
5912                 ap2   = ba + bi[row];
5913                 rmax2 = bimax[row];
5914                 nrow2 = bilen[row];
5915                 low2  = 0;
5916                 high2 = nrow2;
5917                 bm    = aij->B->rmap->n;
5918                 ba    = b->a;
5919               }
5920             } else col = in[j];
5921             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5922           }
5923         }
5924       } else if (!aij->donotstash) {
5925         if (roworiented) {
5926           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5927         } else {
5928           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5929         }
5930       }
5931     }
5932   }
5933   PetscFunctionReturnVoid();
5934 }
5935