xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e9ede7d0480700edd5b61d6f5d5ea240462b08e4)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
679 */
680 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
681 {
682   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
683   Mat            A      = aij->A; /* diagonal part of the matrix */
684   Mat            B      = aij->B; /* offdiagonal part of the matrix */
685   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
686   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
687   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
688   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
689   PetscInt       *ailen = a->ilen,*aj = a->j;
690   PetscInt       *bilen = b->ilen,*bj = b->j;
691   PetscInt       am     = aij->A->rmap->n,j;
692   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i;
693   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
694   PetscScalar    *aa = a->a,*ba = b->a;
695 
696   PetscFunctionBegin;
697   /* Iterate over all rows of the matrix */
698   for (j=0; j<am; j++) {
699     dnz_row = onz_row = 0;
700     rowstart_offd = full_offd_i[j];
701     rowstart_diag = full_diag_i[j];
702     /*  Iterate over all non-zero columns of the current row */
703     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
704       /* If column is in the diagonal */
705       if (mat_j[col] >= cstart && mat_j[col] < cend) {
706         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
707         aa[rowstart_diag+dnz_row] = mat_a[col];
708         dnz_row++;
709       } else { /* off-diagonal entries */
710         bj[rowstart_offd+onz_row] = mat_j[col];
711         ba[rowstart_offd+onz_row] = mat_a[col];
712         onz_row++;
713       }
714     }
715     ailen[j] = dnz_row;
716     bilen[j] = onz_row;
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
722 {
723   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
724   PetscErrorCode ierr;
725   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
727 
728   PetscFunctionBegin;
729   for (i=0; i<m; i++) {
730     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
731     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
732     if (idxm[i] >= rstart && idxm[i] < rend) {
733       row = idxm[i] - rstart;
734       for (j=0; j<n; j++) {
735         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
736         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
737         if (idxn[j] >= cstart && idxn[j] < cend) {
738           col  = idxn[j] - cstart;
739           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
740         } else {
741           if (!aij->colmap) {
742             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
743           }
744 #if defined(PETSC_USE_CTABLE)
745           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
746           col--;
747 #else
748           col = aij->colmap[idxn[j]] - 1;
749 #endif
750           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
751           else {
752             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
753           }
754         }
755       }
756     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
762 
763 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
764 {
765   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
766   PetscErrorCode ierr;
767   PetscInt       nstash,reallocs;
768 
769   PetscFunctionBegin;
770   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
771 
772   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
773   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
774   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 
778 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
779 {
780   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
781   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
782   PetscErrorCode ierr;
783   PetscMPIInt    n;
784   PetscInt       i,j,rstart,ncols,flg;
785   PetscInt       *row,*col;
786   PetscBool      other_disassembled;
787   PetscScalar    *val;
788 
789   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
790 
791   PetscFunctionBegin;
792   if (!aij->donotstash && !mat->nooffprocentries) {
793     while (1) {
794       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
795       if (!flg) break;
796 
797       for (i=0; i<n; ) {
798         /* Now identify the consecutive vals belonging to the same row */
799         for (j=i,rstart=row[j]; j<n; j++) {
800           if (row[j] != rstart) break;
801         }
802         if (j < n) ncols = j-i;
803         else       ncols = n-i;
804         /* Now assemble all these values with a single function call */
805         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
806 
807         i = j;
808       }
809     }
810     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
811   }
812   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
813   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
814 
815   /* determine if any processor has disassembled, if so we must
816      also disassemble ourselfs, in order that we may reassemble. */
817   /*
818      if nonzero structure of submatrix B cannot change then we know that
819      no processor disassembled thus we can skip this stuff
820   */
821   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
822     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
823     if (mat->was_assembled && !other_disassembled) {
824       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
825     }
826   }
827   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
828     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
829   }
830   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
831   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
832   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
833 
834   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
835 
836   aij->rowvalues = 0;
837 
838   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
839   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
840 
841   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
842   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
843     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
844     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
850 {
851   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
852   PetscErrorCode ierr;
853 
854   PetscFunctionBegin;
855   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
856   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
861 {
862   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
863   PetscInt      *lrows;
864   PetscInt       r, len;
865   PetscBool      cong;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   /* get locally owned rows */
870   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
871   /* fix right hand side if needed */
872   if (x && b) {
873     const PetscScalar *xx;
874     PetscScalar       *bb;
875 
876     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
877     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
878     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
879     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
880     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
881   }
882   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
883   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
884   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
885   if ((diag != 0.0) && cong) {
886     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
887   } else if (diag != 0.0) {
888     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
889     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
890     for (r = 0; r < len; ++r) {
891       const PetscInt row = lrows[r] + A->rmap->rstart;
892       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
893     }
894     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896   } else {
897     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(lrows);CHKERRQ(ierr);
900 
901   /* only change matrix nonzero state if pattern was allowed to be changed */
902   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
903     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
904     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
905   }
906   PetscFunctionReturn(0);
907 }
908 
909 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
910 {
911   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
912   PetscErrorCode    ierr;
913   PetscMPIInt       n = A->rmap->n;
914   PetscInt          i,j,r,m,p = 0,len = 0;
915   PetscInt          *lrows,*owners = A->rmap->range;
916   PetscSFNode       *rrows;
917   PetscSF           sf;
918   const PetscScalar *xx;
919   PetscScalar       *bb,*mask;
920   Vec               xmask,lmask;
921   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
922   const PetscInt    *aj, *ii,*ridx;
923   PetscScalar       *aa;
924 
925   PetscFunctionBegin;
926   /* Create SF where leaves are input rows and roots are owned rows */
927   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
928   for (r = 0; r < n; ++r) lrows[r] = -1;
929   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
930   for (r = 0; r < N; ++r) {
931     const PetscInt idx   = rows[r];
932     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
933     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
934       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
935     }
936     rrows[r].rank  = p;
937     rrows[r].index = rows[r] - owners[p];
938   }
939   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
940   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
941   /* Collect flags for rows to be zeroed */
942   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
943   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
944   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
945   /* Compress and put in row numbers */
946   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
947   /* zero diagonal part of matrix */
948   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
949   /* handle off diagonal part of matrix */
950   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
951   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
952   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
953   for (i=0; i<len; i++) bb[lrows[i]] = 1;
954   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
955   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
957   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
958   if (x) {
959     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
963   }
964   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
965   /* remove zeroed rows of off diagonal matrix */
966   ii = aij->i;
967   for (i=0; i<len; i++) {
968     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
969   }
970   /* loop over all elements of off process part of matrix zeroing removed columns*/
971   if (aij->compressedrow.use) {
972     m    = aij->compressedrow.nrows;
973     ii   = aij->compressedrow.i;
974     ridx = aij->compressedrow.rindex;
975     for (i=0; i<m; i++) {
976       n  = ii[i+1] - ii[i];
977       aj = aij->j + ii[i];
978       aa = aij->a + ii[i];
979 
980       for (j=0; j<n; j++) {
981         if (PetscAbsScalar(mask[*aj])) {
982           if (b) bb[*ridx] -= *aa*xx[*aj];
983           *aa = 0.0;
984         }
985         aa++;
986         aj++;
987       }
988       ridx++;
989     }
990   } else { /* do not use compressed row format */
991     m = l->B->rmap->n;
992     for (i=0; i<m; i++) {
993       n  = ii[i+1] - ii[i];
994       aj = aij->j + ii[i];
995       aa = aij->a + ii[i];
996       for (j=0; j<n; j++) {
997         if (PetscAbsScalar(mask[*aj])) {
998           if (b) bb[i] -= *aa*xx[*aj];
999           *aa = 0.0;
1000         }
1001         aa++;
1002         aj++;
1003       }
1004     }
1005   }
1006   if (x) {
1007     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1008     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1009   }
1010   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1011   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1012   ierr = PetscFree(lrows);CHKERRQ(ierr);
1013 
1014   /* only change matrix nonzero state if pattern was allowed to be changed */
1015   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1016     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1017     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscInt       nt;
1027   VecScatter     Mvctx = a->Mvctx;
1028 
1029   PetscFunctionBegin;
1030   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1031   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1032 
1033   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1035   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1041 {
1042   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054   VecScatter     Mvctx = a->Mvctx;
1055 
1056   PetscFunctionBegin;
1057   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1058   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1060   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1066 {
1067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscBool      merged;
1070 
1071   PetscFunctionBegin;
1072   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1073   /* do nondiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   if (!merged) {
1076     /* send it on its way */
1077     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1078     /* do local part */
1079     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1080     /* receive remote parts: note this assumes the values are not actually */
1081     /* added in yy until the next line, */
1082     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1083   } else {
1084     /* do local part */
1085     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1086     /* send it on its way */
1087     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1088     /* values actually were received in the Begin() but we need to call this nop */
1089     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090   }
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1095 {
1096   MPI_Comm       comm;
1097   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1098   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1099   IS             Me,Notme;
1100   PetscErrorCode ierr;
1101   PetscInt       M,N,first,last,*notme,i;
1102   PetscMPIInt    size;
1103 
1104   PetscFunctionBegin;
1105   /* Easy test: symmetric diagonal block */
1106   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1107   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1108   if (!*f) PetscFunctionReturn(0);
1109   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1111   if (size == 1) PetscFunctionReturn(0);
1112 
1113   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1114   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1115   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1116   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1117   for (i=0; i<first; i++) notme[i] = i;
1118   for (i=last; i<M; i++) notme[i-last+first] = i;
1119   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1120   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1121   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1122   Aoff = Aoffs[0];
1123   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1124   Boff = Boffs[0];
1125   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1126   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1127   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1128   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1129   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1130   ierr = PetscFree(notme);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1135 {
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1144 {
1145   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   /* do nondiagonal part */
1150   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1151   /* send it on its way */
1152   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1155   /* receive remote parts */
1156   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1165 {
1166   PetscErrorCode ierr;
1167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1168 
1169   PetscFunctionBegin;
1170   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1171   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1172   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1177 {
1178   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1183   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193 #if defined(PETSC_USE_LOG)
1194   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1195 #endif
1196   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1197   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1198   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1199   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1200 #if defined(PETSC_USE_CTABLE)
1201   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1202 #else
1203   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1204 #endif
1205   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1206   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1207   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1208   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1209   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1210   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1211   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1222 #if defined(PETSC_HAVE_ELEMENTAL)
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1224 #endif
1225 #if defined(PETSC_HAVE_HYPRE)
1226   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1235 {
1236   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1237   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1238   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1239   PetscErrorCode ierr;
1240   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1241   int            fd;
1242   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1243   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1244   PetscScalar    *column_values;
1245   PetscInt       message_count,flowcontrolcount;
1246   FILE           *file;
1247 
1248   PetscFunctionBegin;
1249   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1250   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1251   nz   = A->nz + B->nz;
1252   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1253   if (!rank) {
1254     header[0] = MAT_FILE_CLASSID;
1255     header[1] = mat->rmap->N;
1256     header[2] = mat->cmap->N;
1257 
1258     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1260     /* get largest number of rows any processor has */
1261     rlen  = mat->rmap->n;
1262     range = mat->rmap->range;
1263     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1264   } else {
1265     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266     rlen = mat->rmap->n;
1267   }
1268 
1269   /* load up the local row counts */
1270   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1271   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1272 
1273   /* store the row lengths to the file */
1274   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1275   if (!rank) {
1276     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     for (i=1; i<size; i++) {
1278       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1279       rlen = range[i+1] - range[i];
1280       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1282     }
1283     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1286     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1288   }
1289   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1290 
1291   /* load up the local column indices */
1292   nzmax = nz; /* th processor needs space a largest processor needs */
1293   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1294   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1295   cnt   = 0;
1296   for (i=0; i<mat->rmap->n; i++) {
1297     for (j=B->i[i]; j<B->i[i+1]; j++) {
1298       if ((col = garray[B->j[j]]) > cstart) break;
1299       column_indices[cnt++] = col;
1300     }
1301     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1302     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1303   }
1304   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1305 
1306   /* store the column indices to the file */
1307   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1308   if (!rank) {
1309     MPI_Status status;
1310     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1311     for (i=1; i<size; i++) {
1312       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1313       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1314       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1315       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1317     }
1318     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1319   } else {
1320     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1321     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1323     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1324   }
1325   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1326 
1327   /* load up the local column values */
1328   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1329   cnt  = 0;
1330   for (i=0; i<mat->rmap->n; i++) {
1331     for (j=B->i[i]; j<B->i[i+1]; j++) {
1332       if (garray[B->j[j]] > cstart) break;
1333       column_values[cnt++] = B->a[j];
1334     }
1335     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1336     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1337   }
1338   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1339 
1340   /* store the column values to the file */
1341   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1342   if (!rank) {
1343     MPI_Status status;
1344     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1345     for (i=1; i<size; i++) {
1346       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1347       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1348       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1349       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1350       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1351     }
1352     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1353   } else {
1354     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1355     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1358   }
1359   ierr = PetscFree(column_values);CHKERRQ(ierr);
1360 
1361   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1362   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #include <petscdraw.h>
1367 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1368 {
1369   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1370   PetscErrorCode    ierr;
1371   PetscMPIInt       rank = aij->rank,size = aij->size;
1372   PetscBool         isdraw,iascii,isbinary;
1373   PetscViewer       sviewer;
1374   PetscViewerFormat format;
1375 
1376   PetscFunctionBegin;
1377   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1378   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1379   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1380   if (iascii) {
1381     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1382     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1383       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1384       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1385       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1386       for (i=0; i<(PetscInt)size; i++) {
1387         nmax = PetscMax(nmax,nz[i]);
1388         nmin = PetscMin(nmin,nz[i]);
1389         navg += nz[i];
1390       }
1391       ierr = PetscFree(nz);CHKERRQ(ierr);
1392       navg = navg/size;
1393       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1394       PetscFunctionReturn(0);
1395     }
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1398       MatInfo   info;
1399       PetscBool inodes;
1400 
1401       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1402       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1403       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1404       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1405       if (!inodes) {
1406         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1407                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1408       } else {
1409         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1410                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1411       }
1412       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1413       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1414       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1416       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1418       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1419       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1420       PetscFunctionReturn(0);
1421     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1422       PetscInt inodecount,inodelimit,*inodes;
1423       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1424       if (inodes) {
1425         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1426       } else {
1427         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1428       }
1429       PetscFunctionReturn(0);
1430     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1431       PetscFunctionReturn(0);
1432     }
1433   } else if (isbinary) {
1434     if (size == 1) {
1435       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1436       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1437     } else {
1438       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1439     }
1440     PetscFunctionReturn(0);
1441   } else if (isdraw) {
1442     PetscDraw draw;
1443     PetscBool isnull;
1444     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1445     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1446     if (isnull) PetscFunctionReturn(0);
1447   }
1448 
1449   {
1450     /* assemble the entire matrix onto first processor. */
1451     Mat        A;
1452     Mat_SeqAIJ *Aloc;
1453     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1454     MatScalar  *a;
1455 
1456     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1457     if (!rank) {
1458       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1459     } else {
1460       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1461     }
1462     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1463     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1464     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1465     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1466     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1467 
1468     /* copy over the A part */
1469     Aloc = (Mat_SeqAIJ*)aij->A->data;
1470     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1471     row  = mat->rmap->rstart;
1472     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1473     for (i=0; i<m; i++) {
1474       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1475       row++;
1476       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1477     }
1478     aj = Aloc->j;
1479     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1480 
1481     /* copy over the B part */
1482     Aloc = (Mat_SeqAIJ*)aij->B->data;
1483     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1484     row  = mat->rmap->rstart;
1485     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1486     ct   = cols;
1487     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1488     for (i=0; i<m; i++) {
1489       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1490       row++;
1491       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1492     }
1493     ierr = PetscFree(ct);CHKERRQ(ierr);
1494     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1496     /*
1497        Everyone has to call to draw the matrix since the graphics waits are
1498        synchronized across all processors that share the PetscDraw object
1499     */
1500     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1501     if (!rank) {
1502       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1503       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1504     }
1505     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1506     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1507     ierr = MatDestroy(&A);CHKERRQ(ierr);
1508   }
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1513 {
1514   PetscErrorCode ierr;
1515   PetscBool      iascii,isdraw,issocket,isbinary;
1516 
1517   PetscFunctionBegin;
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1521   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1522   if (iascii || isdraw || isbinary || issocket) {
1523     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1524   }
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1529 {
1530   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1531   PetscErrorCode ierr;
1532   Vec            bb1 = 0;
1533   PetscBool      hasop;
1534 
1535   PetscFunctionBegin;
1536   if (flag == SOR_APPLY_UPPER) {
1537     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1538     PetscFunctionReturn(0);
1539   }
1540 
1541   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1542     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1543   }
1544 
1545   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1546     if (flag & SOR_ZERO_INITIAL_GUESS) {
1547       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1548       its--;
1549     }
1550 
1551     while (its--) {
1552       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554 
1555       /* update rhs: bb1 = bb - B*x */
1556       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1557       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1558 
1559       /* local sweep */
1560       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1561     }
1562   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1563     if (flag & SOR_ZERO_INITIAL_GUESS) {
1564       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1565       its--;
1566     }
1567     while (its--) {
1568       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1570 
1571       /* update rhs: bb1 = bb - B*x */
1572       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1573       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1574 
1575       /* local sweep */
1576       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1577     }
1578   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1579     if (flag & SOR_ZERO_INITIAL_GUESS) {
1580       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1581       its--;
1582     }
1583     while (its--) {
1584       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1586 
1587       /* update rhs: bb1 = bb - B*x */
1588       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1589       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1590 
1591       /* local sweep */
1592       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1593     }
1594   } else if (flag & SOR_EISENSTAT) {
1595     Vec xx1;
1596 
1597     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1598     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1599 
1600     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602     if (!mat->diag) {
1603       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1604       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1605     }
1606     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1607     if (hasop) {
1608       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1609     } else {
1610       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1611     }
1612     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1613 
1614     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1615 
1616     /* local sweep */
1617     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1618     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1619     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1620   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1621 
1622   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1623 
1624   matin->factorerrortype = mat->A->factorerrortype;
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1629 {
1630   Mat            aA,aB,Aperm;
1631   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1632   PetscScalar    *aa,*ba;
1633   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1634   PetscSF        rowsf,sf;
1635   IS             parcolp = NULL;
1636   PetscBool      done;
1637   PetscErrorCode ierr;
1638 
1639   PetscFunctionBegin;
1640   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1641   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1642   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1643   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1644 
1645   /* Invert row permutation to find out where my rows should go */
1646   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1647   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1648   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1649   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1650   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1652 
1653   /* Invert column permutation to find out where my columns should go */
1654   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1655   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1656   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1657   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1658   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1660   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1661 
1662   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1663   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1664   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1665 
1666   /* Find out where my gcols should go */
1667   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1668   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1669   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1670   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1671   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1672   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1674   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1675 
1676   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1678   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1679   for (i=0; i<m; i++) {
1680     PetscInt row = rdest[i],rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt cowner,col = cdest[aj[j]];
1684       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1685       if (rowner == cowner) dnnz[i]++;
1686       else onnz[i]++;
1687     }
1688     for (j=bi[i]; j<bi[i+1]; j++) {
1689       PetscInt cowner,col = gcdest[bj[j]];
1690       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1691       if (rowner == cowner) dnnz[i]++;
1692       else onnz[i]++;
1693     }
1694   }
1695   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1696   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1699   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1700 
1701   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1702   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1703   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1704   for (i=0; i<m; i++) {
1705     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1706     PetscInt j0,rowlen;
1707     rowlen = ai[i+1] - ai[i];
1708     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1709       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1710       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1711     }
1712     rowlen = bi[i+1] - bi[i];
1713     for (j0=j=0; j<rowlen; j0=j) {
1714       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1715       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1716     }
1717   }
1718   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1721   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1722   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1723   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1724   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1725   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1726   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1727   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1728   *B = Aperm;
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1733 {
1734   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1739   if (ghosts) *ghosts = aij->garray;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1744 {
1745   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1746   Mat            A    = mat->A,B = mat->B;
1747   PetscErrorCode ierr;
1748   PetscReal      isend[5],irecv[5];
1749 
1750   PetscFunctionBegin;
1751   info->block_size = 1.0;
1752   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1753 
1754   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1755   isend[3] = info->memory;  isend[4] = info->mallocs;
1756 
1757   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1758 
1759   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1760   isend[3] += info->memory;  isend[4] += info->mallocs;
1761   if (flag == MAT_LOCAL) {
1762     info->nz_used      = isend[0];
1763     info->nz_allocated = isend[1];
1764     info->nz_unneeded  = isend[2];
1765     info->memory       = isend[3];
1766     info->mallocs      = isend[4];
1767   } else if (flag == MAT_GLOBAL_MAX) {
1768     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1769 
1770     info->nz_used      = irecv[0];
1771     info->nz_allocated = irecv[1];
1772     info->nz_unneeded  = irecv[2];
1773     info->memory       = irecv[3];
1774     info->mallocs      = irecv[4];
1775   } else if (flag == MAT_GLOBAL_SUM) {
1776     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1777 
1778     info->nz_used      = irecv[0];
1779     info->nz_allocated = irecv[1];
1780     info->nz_unneeded  = irecv[2];
1781     info->memory       = irecv[3];
1782     info->mallocs      = irecv[4];
1783   }
1784   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1785   info->fill_ratio_needed = 0;
1786   info->factor_mallocs    = 0;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1791 {
1792   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   switch (op) {
1797   case MAT_NEW_NONZERO_LOCATIONS:
1798   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1799   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1800   case MAT_KEEP_NONZERO_PATTERN:
1801   case MAT_NEW_NONZERO_LOCATION_ERR:
1802   case MAT_USE_INODES:
1803   case MAT_IGNORE_ZERO_ENTRIES:
1804     MatCheckPreallocated(A,1);
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_ROW_ORIENTED:
1809     MatCheckPreallocated(A,1);
1810     a->roworiented = flg;
1811 
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1814     break;
1815   case MAT_NEW_DIAGONALS:
1816     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1817     break;
1818   case MAT_IGNORE_OFF_PROC_ENTRIES:
1819     a->donotstash = flg;
1820     break;
1821   case MAT_SPD:
1822     A->spd_set = PETSC_TRUE;
1823     A->spd     = flg;
1824     if (flg) {
1825       A->symmetric                  = PETSC_TRUE;
1826       A->structurally_symmetric     = PETSC_TRUE;
1827       A->symmetric_set              = PETSC_TRUE;
1828       A->structurally_symmetric_set = PETSC_TRUE;
1829     }
1830     break;
1831   case MAT_SYMMETRIC:
1832     MatCheckPreallocated(A,1);
1833     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1834     break;
1835   case MAT_STRUCTURALLY_SYMMETRIC:
1836     MatCheckPreallocated(A,1);
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_HERMITIAN:
1840     MatCheckPreallocated(A,1);
1841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1842     break;
1843   case MAT_SYMMETRY_ETERNAL:
1844     MatCheckPreallocated(A,1);
1845     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1846     break;
1847   case MAT_SUBMAT_SINGLEIS:
1848     A->submat_singleis = flg;
1849     break;
1850   case MAT_STRUCTURE_ONLY:
1851     /* The option is handled directly by MatSetOption() */
1852     break;
1853   default:
1854     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1860 {
1861   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1862   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1863   PetscErrorCode ierr;
1864   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1865   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1866   PetscInt       *cmap,*idx_p;
1867 
1868   PetscFunctionBegin;
1869   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1870   mat->getrowactive = PETSC_TRUE;
1871 
1872   if (!mat->rowvalues && (idx || v)) {
1873     /*
1874         allocate enough space to hold information from the longest row.
1875     */
1876     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1877     PetscInt   max = 1,tmp;
1878     for (i=0; i<matin->rmap->n; i++) {
1879       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1880       if (max < tmp) max = tmp;
1881     }
1882     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1883   }
1884 
1885   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1886   lrow = row - rstart;
1887 
1888   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1889   if (!v)   {pvA = 0; pvB = 0;}
1890   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1891   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1892   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1893   nztot = nzA + nzB;
1894 
1895   cmap = mat->garray;
1896   if (v  || idx) {
1897     if (nztot) {
1898       /* Sort by increasing column numbers, assuming A and B already sorted */
1899       PetscInt imark = -1;
1900       if (v) {
1901         *v = v_p = mat->rowvalues;
1902         for (i=0; i<nzB; i++) {
1903           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1904           else break;
1905         }
1906         imark = i;
1907         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1908         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1909       }
1910       if (idx) {
1911         *idx = idx_p = mat->rowindices;
1912         if (imark > -1) {
1913           for (i=0; i<imark; i++) {
1914             idx_p[i] = cmap[cworkB[i]];
1915           }
1916         } else {
1917           for (i=0; i<nzB; i++) {
1918             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1919             else break;
1920           }
1921           imark = i;
1922         }
1923         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1924         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1925       }
1926     } else {
1927       if (idx) *idx = 0;
1928       if (v)   *v   = 0;
1929     }
1930   }
1931   *nz  = nztot;
1932   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1933   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1938 {
1939   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1940 
1941   PetscFunctionBegin;
1942   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1943   aij->getrowactive = PETSC_FALSE;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1948 {
1949   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1950   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1951   PetscErrorCode ierr;
1952   PetscInt       i,j,cstart = mat->cmap->rstart;
1953   PetscReal      sum = 0.0;
1954   MatScalar      *v;
1955 
1956   PetscFunctionBegin;
1957   if (aij->size == 1) {
1958     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1959   } else {
1960     if (type == NORM_FROBENIUS) {
1961       v = amat->a;
1962       for (i=0; i<amat->nz; i++) {
1963         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1964       }
1965       v = bmat->a;
1966       for (i=0; i<bmat->nz; i++) {
1967         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1968       }
1969       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1970       *norm = PetscSqrtReal(*norm);
1971       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1972     } else if (type == NORM_1) { /* max column norm */
1973       PetscReal *tmp,*tmp2;
1974       PetscInt  *jj,*garray = aij->garray;
1975       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1976       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1977       *norm = 0.0;
1978       v     = amat->a; jj = amat->j;
1979       for (j=0; j<amat->nz; j++) {
1980         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1981       }
1982       v = bmat->a; jj = bmat->j;
1983       for (j=0; j<bmat->nz; j++) {
1984         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1985       }
1986       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1987       for (j=0; j<mat->cmap->N; j++) {
1988         if (tmp2[j] > *norm) *norm = tmp2[j];
1989       }
1990       ierr = PetscFree(tmp);CHKERRQ(ierr);
1991       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else if (type == NORM_INFINITY) { /* max row norm */
1994       PetscReal ntemp = 0.0;
1995       for (j=0; j<aij->A->rmap->n; j++) {
1996         v   = amat->a + amat->i[j];
1997         sum = 0.0;
1998         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1999           sum += PetscAbsScalar(*v); v++;
2000         }
2001         v = bmat->a + bmat->i[j];
2002         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2003           sum += PetscAbsScalar(*v); v++;
2004         }
2005         if (sum > ntemp) ntemp = sum;
2006       }
2007       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2008       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2009     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2010   }
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2015 {
2016   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2017   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2018   PetscErrorCode ierr;
2019   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2020   PetscInt       cstart = A->cmap->rstart,ncol;
2021   Mat            B;
2022   MatScalar      *array;
2023 
2024   PetscFunctionBegin;
2025   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2026   ai = Aloc->i; aj = Aloc->j;
2027   bi = Bloc->i; bj = Bloc->j;
2028   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2029     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2030     PetscSFNode          *oloc;
2031     PETSC_UNUSED PetscSF sf;
2032 
2033     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2034     /* compute d_nnz for preallocation */
2035     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2036     for (i=0; i<ai[ma]; i++) {
2037       d_nnz[aj[i]]++;
2038       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2039     }
2040     /* compute local off-diagonal contributions */
2041     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2043     /* map those to global */
2044     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2045     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2046     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2047     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2048     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2051 
2052     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2053     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2054     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2055     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2056     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2057     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2058   } else {
2059     B    = *matout;
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2061     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2062   }
2063 
2064   /* copy over the A part */
2065   array = Aloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<ma; i++) {
2068     ncol = ai[i+1]-ai[i];
2069     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2070     row++;
2071     array += ncol; aj += ncol;
2072   }
2073   aj = Aloc->j;
2074   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2075 
2076   /* copy over the B part */
2077   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2078   array = Bloc->a;
2079   row   = A->rmap->rstart;
2080   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2081   cols_tmp = cols;
2082   for (i=0; i<mb; i++) {
2083     ncol = bi[i+1]-bi[i];
2084     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2085     row++;
2086     array += ncol; cols_tmp += ncol;
2087   }
2088   ierr = PetscFree(cols);CHKERRQ(ierr);
2089 
2090   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2091   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2093     *matout = B;
2094   } else {
2095     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2096   }
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2101 {
2102   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2103   Mat            a    = aij->A,b = aij->B;
2104   PetscErrorCode ierr;
2105   PetscInt       s1,s2,s3;
2106 
2107   PetscFunctionBegin;
2108   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2109   if (rr) {
2110     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2111     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2112     /* Overlap communication with computation. */
2113     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2114   }
2115   if (ll) {
2116     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2117     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2118     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2119   }
2120   /* scale  the diagonal block */
2121   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2122 
2123   if (rr) {
2124     /* Do a scatter end and then right scale the off-diagonal block */
2125     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2126     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2132 {
2133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2142 {
2143   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2144   Mat            a,b,c,d;
2145   PetscBool      flg;
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   a = matA->A; b = matA->B;
2150   c = matB->A; d = matB->B;
2151 
2152   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2153   if (flg) {
2154     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2155   }
2156   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2161 {
2162   PetscErrorCode ierr;
2163   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2164   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2165 
2166   PetscFunctionBegin;
2167   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2168   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2169     /* because of the column compression in the off-processor part of the matrix a->B,
2170        the number of columns in a->B and b->B may be different, hence we cannot call
2171        the MatCopy() directly on the two parts. If need be, we can provide a more
2172        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2173        then copying the submatrices */
2174     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2175   } else {
2176     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2177     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2178   }
2179   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2184 {
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 /*
2193    Computes the number of nonzeros per row needed for preallocation when X and Y
2194    have different nonzero structure.
2195 */
2196 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2197 {
2198   PetscInt       i,j,k,nzx,nzy;
2199 
2200   PetscFunctionBegin;
2201   /* Set the number of nonzeros in the new matrix */
2202   for (i=0; i<m; i++) {
2203     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2204     nzx = xi[i+1] - xi[i];
2205     nzy = yi[i+1] - yi[i];
2206     nnz[i] = 0;
2207     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2208       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2209       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2210       nnz[i]++;
2211     }
2212     for (; k<nzy; k++) nnz[i]++;
2213   }
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2218 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2219 {
2220   PetscErrorCode ierr;
2221   PetscInt       m = Y->rmap->N;
2222   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2223   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2224 
2225   PetscFunctionBegin;
2226   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2231 {
2232   PetscErrorCode ierr;
2233   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2234   PetscBLASInt   bnz,one=1;
2235   Mat_SeqAIJ     *x,*y;
2236 
2237   PetscFunctionBegin;
2238   if (str == SAME_NONZERO_PATTERN) {
2239     PetscScalar alpha = a;
2240     x    = (Mat_SeqAIJ*)xx->A->data;
2241     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2242     y    = (Mat_SeqAIJ*)yy->A->data;
2243     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2244     x    = (Mat_SeqAIJ*)xx->B->data;
2245     y    = (Mat_SeqAIJ*)yy->B->data;
2246     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2247     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2248     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2249   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2250     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2251   } else {
2252     Mat      B;
2253     PetscInt *nnz_d,*nnz_o;
2254     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2255     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2256     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2257     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2258     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2259     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2260     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2261     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2262     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2263     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2264     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2265     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2266     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2267     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2273 
2274 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2275 {
2276 #if defined(PETSC_USE_COMPLEX)
2277   PetscErrorCode ierr;
2278   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2282   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2283 #else
2284   PetscFunctionBegin;
2285 #endif
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2290 {
2291   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2292   PetscErrorCode ierr;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2296   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2301 {
2302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2303   PetscErrorCode ierr;
2304 
2305   PetscFunctionBegin;
2306   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2307   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2312 {
2313   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2314   PetscErrorCode ierr;
2315   PetscInt       i,*idxb = 0;
2316   PetscScalar    *va,*vb;
2317   Vec            vtmp;
2318 
2319   PetscFunctionBegin;
2320   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2321   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2322   if (idx) {
2323     for (i=0; i<A->rmap->n; i++) {
2324       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2325     }
2326   }
2327 
2328   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2329   if (idx) {
2330     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2331   }
2332   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2333   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2334 
2335   for (i=0; i<A->rmap->n; i++) {
2336     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2337       va[i] = vb[i];
2338       if (idx) idx[i] = a->garray[idxb[i]];
2339     }
2340   }
2341 
2342   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2343   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2344   ierr = PetscFree(idxb);CHKERRQ(ierr);
2345   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2350 {
2351   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2352   PetscErrorCode ierr;
2353   PetscInt       i,*idxb = 0;
2354   PetscScalar    *va,*vb;
2355   Vec            vtmp;
2356 
2357   PetscFunctionBegin;
2358   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2359   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2360   if (idx) {
2361     for (i=0; i<A->cmap->n; i++) {
2362       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2363     }
2364   }
2365 
2366   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2367   if (idx) {
2368     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2369   }
2370   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2371   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2372 
2373   for (i=0; i<A->rmap->n; i++) {
2374     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2375       va[i] = vb[i];
2376       if (idx) idx[i] = a->garray[idxb[i]];
2377     }
2378   }
2379 
2380   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2381   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2382   ierr = PetscFree(idxb);CHKERRQ(ierr);
2383   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2388 {
2389   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2390   PetscInt       n      = A->rmap->n;
2391   PetscInt       cstart = A->cmap->rstart;
2392   PetscInt       *cmap  = mat->garray;
2393   PetscInt       *diagIdx, *offdiagIdx;
2394   Vec            diagV, offdiagV;
2395   PetscScalar    *a, *diagA, *offdiagA;
2396   PetscInt       r;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2402   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2403   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2404   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2405   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   for (r = 0; r < n; ++r) {
2409     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2410       a[r]   = diagA[r];
2411       idx[r] = cstart + diagIdx[r];
2412     } else {
2413       a[r]   = offdiagA[r];
2414       idx[r] = cmap[offdiagIdx[r]];
2415     }
2416   }
2417   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2419   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2420   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2421   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2422   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2427 {
2428   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2429   PetscInt       n      = A->rmap->n;
2430   PetscInt       cstart = A->cmap->rstart;
2431   PetscInt       *cmap  = mat->garray;
2432   PetscInt       *diagIdx, *offdiagIdx;
2433   Vec            diagV, offdiagV;
2434   PetscScalar    *a, *diagA, *offdiagA;
2435   PetscInt       r;
2436   PetscErrorCode ierr;
2437 
2438   PetscFunctionBegin;
2439   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2440   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2441   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2442   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2443   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2444   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2445   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2446   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2447   for (r = 0; r < n; ++r) {
2448     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2449       a[r]   = diagA[r];
2450       idx[r] = cstart + diagIdx[r];
2451     } else {
2452       a[r]   = offdiagA[r];
2453       idx[r] = cmap[offdiagIdx[r]];
2454     }
2455   }
2456   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2457   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2458   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2459   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2460   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2461   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2466 {
2467   PetscErrorCode ierr;
2468   Mat            *dummy;
2469 
2470   PetscFunctionBegin;
2471   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2472   *newmat = *dummy;
2473   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2478 {
2479   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2480   PetscErrorCode ierr;
2481 
2482   PetscFunctionBegin;
2483   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2484   A->factorerrortype = a->A->factorerrortype;
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2489 {
2490   PetscErrorCode ierr;
2491   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2492 
2493   PetscFunctionBegin;
2494   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2495   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2496   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2497   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2502 {
2503   PetscFunctionBegin;
2504   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2505   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 /*@
2510    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2511 
2512    Collective on Mat
2513 
2514    Input Parameters:
2515 +    A - the matrix
2516 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2517 
2518  Level: advanced
2519 
2520 @*/
2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2522 {
2523   PetscErrorCode       ierr;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2531 {
2532   PetscErrorCode       ierr;
2533   PetscBool            sc = PETSC_FALSE,flg;
2534 
2535   PetscFunctionBegin;
2536   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2537   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2538   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2539   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2540   if (flg) {
2541     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2542   }
2543   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2548 {
2549   PetscErrorCode ierr;
2550   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2551   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2552 
2553   PetscFunctionBegin;
2554   if (!Y->preallocated) {
2555     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2556   } else if (!aij->nz) {
2557     PetscInt nonew = aij->nonew;
2558     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2559     aij->nonew = nonew;
2560   }
2561   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2566 {
2567   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2572   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2573   if (d) {
2574     PetscInt rstart;
2575     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2576     *d += rstart;
2577 
2578   }
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 
2583 /* -------------------------------------------------------------------*/
2584 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2585                                        MatGetRow_MPIAIJ,
2586                                        MatRestoreRow_MPIAIJ,
2587                                        MatMult_MPIAIJ,
2588                                 /* 4*/ MatMultAdd_MPIAIJ,
2589                                        MatMultTranspose_MPIAIJ,
2590                                        MatMultTransposeAdd_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                 /*10*/ 0,
2595                                        0,
2596                                        0,
2597                                        MatSOR_MPIAIJ,
2598                                        MatTranspose_MPIAIJ,
2599                                 /*15*/ MatGetInfo_MPIAIJ,
2600                                        MatEqual_MPIAIJ,
2601                                        MatGetDiagonal_MPIAIJ,
2602                                        MatDiagonalScale_MPIAIJ,
2603                                        MatNorm_MPIAIJ,
2604                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2605                                        MatAssemblyEnd_MPIAIJ,
2606                                        MatSetOption_MPIAIJ,
2607                                        MatZeroEntries_MPIAIJ,
2608                                 /*24*/ MatZeroRows_MPIAIJ,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                 /*29*/ MatSetUp_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        MatGetDiagonalBlock_MPIAIJ,
2617                                        0,
2618                                 /*34*/ MatDuplicate_MPIAIJ,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                 /*39*/ MatAXPY_MPIAIJ,
2624                                        MatCreateSubMatrices_MPIAIJ,
2625                                        MatIncreaseOverlap_MPIAIJ,
2626                                        MatGetValues_MPIAIJ,
2627                                        MatCopy_MPIAIJ,
2628                                 /*44*/ MatGetRowMax_MPIAIJ,
2629                                        MatScale_MPIAIJ,
2630                                        MatShift_MPIAIJ,
2631                                        MatDiagonalSet_MPIAIJ,
2632                                        MatZeroRowsColumns_MPIAIJ,
2633                                 /*49*/ MatSetRandom_MPIAIJ,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2639                                        0,
2640                                        MatSetUnfactored_MPIAIJ,
2641                                        MatPermute_MPIAIJ,
2642                                        0,
2643                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2644                                        MatDestroy_MPIAIJ,
2645                                        MatView_MPIAIJ,
2646                                        0,
2647                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2654                                        MatGetRowMinAbs_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*75*/ MatFDColoringApply_AIJ,
2660                                        MatSetFromOptions_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        MatFindZeroDiagonals_MPIAIJ,
2664                                 /*80*/ 0,
2665                                        0,
2666                                        0,
2667                                 /*83*/ MatLoad_MPIAIJ,
2668                                        MatIsSymmetric_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2674                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2675                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2676                                        MatPtAP_MPIAIJ_MPIAIJ,
2677                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2678                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                 /*99*/ 0,
2684                                        0,
2685                                        0,
2686                                        MatConjugate_MPIAIJ,
2687                                        0,
2688                                 /*104*/MatSetValuesRow_MPIAIJ,
2689                                        MatRealPart_MPIAIJ,
2690                                        MatImaginaryPart_MPIAIJ,
2691                                        0,
2692                                        0,
2693                                 /*109*/0,
2694                                        0,
2695                                        MatGetRowMin_MPIAIJ,
2696                                        0,
2697                                        MatMissingDiagonal_MPIAIJ,
2698                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2699                                        0,
2700                                        MatGetGhosts_MPIAIJ,
2701                                        0,
2702                                        0,
2703                                 /*119*/0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        MatGetMultiProcBlock_MPIAIJ,
2708                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2709                                        MatGetColumnNorms_MPIAIJ,
2710                                        MatInvertBlockDiagonal_MPIAIJ,
2711                                        0,
2712                                        MatCreateSubMatricesMPI_MPIAIJ,
2713                                 /*129*/0,
2714                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2715                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2716                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2717                                        0,
2718                                 /*134*/0,
2719                                        0,
2720                                        MatRARt_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                        0,
2723                                 /*139*/MatSetBlockSizes_MPIAIJ,
2724                                        0,
2725                                        0,
2726                                        MatFDColoringSetUp_MPIXAIJ,
2727                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2728                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2729 };
2730 
2731 /* ----------------------------------------------------------------------------------------*/
2732 
2733 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2734 {
2735   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2740   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2745 {
2746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2751   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2756 {
2757   Mat_MPIAIJ     *b;
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2762   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2763   b = (Mat_MPIAIJ*)B->data;
2764 
2765 #if defined(PETSC_USE_CTABLE)
2766   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2767 #else
2768   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2769 #endif
2770   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2771   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2772   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2773 
2774   /* Because the B will have been resized we simply destroy it and create a new one each time */
2775   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2776   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2777   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2778   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2779   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2780   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2781 
2782   if (!B->preallocated) {
2783     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2784     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2785     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2786     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2787     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2788   }
2789 
2790   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2791   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2792   B->preallocated  = PETSC_TRUE;
2793   B->was_assembled = PETSC_FALSE;
2794   B->assembled     = PETSC_FALSE;;
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2799 {
2800   Mat_MPIAIJ     *b;
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2805   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2807   b = (Mat_MPIAIJ*)B->data;
2808 
2809 #if defined(PETSC_USE_CTABLE)
2810   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2811 #else
2812   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2813 #endif
2814   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2815   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2816   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2817 
2818   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2819   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2820   B->preallocated  = PETSC_TRUE;
2821   B->was_assembled = PETSC_FALSE;
2822   B->assembled = PETSC_FALSE;
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2827 {
2828   Mat            mat;
2829   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2830   PetscErrorCode ierr;
2831 
2832   PetscFunctionBegin;
2833   *newmat = 0;
2834   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2835   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2836   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2837   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2838   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2839   a       = (Mat_MPIAIJ*)mat->data;
2840 
2841   mat->factortype   = matin->factortype;
2842   mat->assembled    = PETSC_TRUE;
2843   mat->insertmode   = NOT_SET_VALUES;
2844   mat->preallocated = PETSC_TRUE;
2845 
2846   a->size         = oldmat->size;
2847   a->rank         = oldmat->rank;
2848   a->donotstash   = oldmat->donotstash;
2849   a->roworiented  = oldmat->roworiented;
2850   a->rowindices   = 0;
2851   a->rowvalues    = 0;
2852   a->getrowactive = PETSC_FALSE;
2853 
2854   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2855   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2856 
2857   if (oldmat->colmap) {
2858 #if defined(PETSC_USE_CTABLE)
2859     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2860 #else
2861     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2862     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2863     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2864 #endif
2865   } else a->colmap = 0;
2866   if (oldmat->garray) {
2867     PetscInt len;
2868     len  = oldmat->B->cmap->n;
2869     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2870     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2871     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2872   } else a->garray = 0;
2873 
2874   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2875   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2876   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2877   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2878 
2879   if (oldmat->Mvctx_mpi1) {
2880     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2881     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2882   }
2883 
2884   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2885   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2886   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2887   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2888   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2889   *newmat = mat;
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2894 {
2895   PetscScalar    *vals,*svals;
2896   MPI_Comm       comm;
2897   PetscErrorCode ierr;
2898   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2899   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2900   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2901   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2902   PetscInt       cend,cstart,n,*rowners;
2903   int            fd;
2904   PetscInt       bs = newMat->rmap->bs;
2905 
2906   PetscFunctionBegin;
2907   /* force binary viewer to load .info file if it has not yet done so */
2908   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2909   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2910   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2911   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2912   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2913   if (!rank) {
2914     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2915     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2916     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2917   }
2918 
2919   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2920   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2921   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2922   if (bs < 0) bs = 1;
2923 
2924   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2925   M    = header[1]; N = header[2];
2926 
2927   /* If global sizes are set, check if they are consistent with that given in the file */
2928   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2929   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2930 
2931   /* determine ownership of all (block) rows */
2932   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2933   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2934   else m = newMat->rmap->n; /* Set by user */
2935 
2936   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2937   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2938 
2939   /* First process needs enough room for process with most rows */
2940   if (!rank) {
2941     mmax = rowners[1];
2942     for (i=2; i<=size; i++) {
2943       mmax = PetscMax(mmax, rowners[i]);
2944     }
2945   } else mmax = -1;             /* unused, but compilers complain */
2946 
2947   rowners[0] = 0;
2948   for (i=2; i<=size; i++) {
2949     rowners[i] += rowners[i-1];
2950   }
2951   rstart = rowners[rank];
2952   rend   = rowners[rank+1];
2953 
2954   /* distribute row lengths to all processors */
2955   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2956   if (!rank) {
2957     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2958     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2959     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2960     for (j=0; j<m; j++) {
2961       procsnz[0] += ourlens[j];
2962     }
2963     for (i=1; i<size; i++) {
2964       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2965       /* calculate the number of nonzeros on each processor */
2966       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2967         procsnz[i] += rowlengths[j];
2968       }
2969       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2970     }
2971     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2972   } else {
2973     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2974   }
2975 
2976   if (!rank) {
2977     /* determine max buffer needed and allocate it */
2978     maxnz = 0;
2979     for (i=0; i<size; i++) {
2980       maxnz = PetscMax(maxnz,procsnz[i]);
2981     }
2982     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2983 
2984     /* read in my part of the matrix column indices  */
2985     nz   = procsnz[0];
2986     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2987     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2988 
2989     /* read in every one elses and ship off */
2990     for (i=1; i<size; i++) {
2991       nz   = procsnz[i];
2992       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2993       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2994     }
2995     ierr = PetscFree(cols);CHKERRQ(ierr);
2996   } else {
2997     /* determine buffer space needed for message */
2998     nz = 0;
2999     for (i=0; i<m; i++) {
3000       nz += ourlens[i];
3001     }
3002     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3003 
3004     /* receive message of column indices*/
3005     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3006   }
3007 
3008   /* determine column ownership if matrix is not square */
3009   if (N != M) {
3010     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3011     else n = newMat->cmap->n;
3012     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3013     cstart = cend - n;
3014   } else {
3015     cstart = rstart;
3016     cend   = rend;
3017     n      = cend - cstart;
3018   }
3019 
3020   /* loop over local rows, determining number of off diagonal entries */
3021   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3022   jj   = 0;
3023   for (i=0; i<m; i++) {
3024     for (j=0; j<ourlens[i]; j++) {
3025       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3026       jj++;
3027     }
3028   }
3029 
3030   for (i=0; i<m; i++) {
3031     ourlens[i] -= offlens[i];
3032   }
3033   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3034 
3035   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3036 
3037   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3038 
3039   for (i=0; i<m; i++) {
3040     ourlens[i] += offlens[i];
3041   }
3042 
3043   if (!rank) {
3044     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3045 
3046     /* read in my part of the matrix numerical values  */
3047     nz   = procsnz[0];
3048     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3049 
3050     /* insert into matrix */
3051     jj      = rstart;
3052     smycols = mycols;
3053     svals   = vals;
3054     for (i=0; i<m; i++) {
3055       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3056       smycols += ourlens[i];
3057       svals   += ourlens[i];
3058       jj++;
3059     }
3060 
3061     /* read in other processors and ship out */
3062     for (i=1; i<size; i++) {
3063       nz   = procsnz[i];
3064       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3065       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3066     }
3067     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3068   } else {
3069     /* receive numeric values */
3070     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3071 
3072     /* receive message of values*/
3073     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3074 
3075     /* insert into matrix */
3076     jj      = rstart;
3077     smycols = mycols;
3078     svals   = vals;
3079     for (i=0; i<m; i++) {
3080       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3081       smycols += ourlens[i];
3082       svals   += ourlens[i];
3083       jj++;
3084     }
3085   }
3086   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3087   ierr = PetscFree(vals);CHKERRQ(ierr);
3088   ierr = PetscFree(mycols);CHKERRQ(ierr);
3089   ierr = PetscFree(rowners);CHKERRQ(ierr);
3090   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 /* Not scalable because of ISAllGather() unless getting all columns. */
3096 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3097 {
3098   PetscErrorCode ierr;
3099   IS             iscol_local;
3100   PetscBool      isstride;
3101   PetscMPIInt    lisstride=0,gisstride;
3102 
3103   PetscFunctionBegin;
3104   /* check if we are grabbing all columns*/
3105   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3106 
3107   if (isstride) {
3108     PetscInt  start,len,mstart,mlen;
3109     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3110     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3111     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3112     if (mstart == start && mlen-mstart == len) lisstride = 1;
3113   }
3114 
3115   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3116   if (gisstride) {
3117     PetscInt N;
3118     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3119     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3120     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3121     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3122   } else {
3123     PetscInt cbs;
3124     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3125     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3126     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3127   }
3128 
3129   *isseq = iscol_local;
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 /*
3134  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3135  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3136 
3137  Input Parameters:
3138    mat - matrix
3139    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3140            i.e., mat->rstart <= isrow[i] < mat->rend
3141    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3142            i.e., mat->cstart <= iscol[i] < mat->cend
3143  Output Parameter:
3144    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3145    iscol_o - sequential column index set for retrieving mat->B
3146    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3147  */
3148 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3149 {
3150   PetscErrorCode ierr;
3151   Vec            x,cmap;
3152   const PetscInt *is_idx;
3153   PetscScalar    *xarray,*cmaparray;
3154   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3155   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3156   Mat            B=a->B;
3157   Vec            lvec=a->lvec,lcmap;
3158   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3159   MPI_Comm       comm;
3160   VecScatter     Mvctx=a->Mvctx;
3161 
3162   PetscFunctionBegin;
3163   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3164   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3165 
3166   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3167   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3168   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3169   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3170   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3171 
3172   /* Get start indices */
3173   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3174   isstart -= ncols;
3175   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3176 
3177   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3178   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3179   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3180   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3181   for (i=0; i<ncols; i++) {
3182     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3183     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3184     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3185   }
3186   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3187   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3188   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3189 
3190   /* Get iscol_d */
3191   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3192   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3193   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3194 
3195   /* Get isrow_d */
3196   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3197   rstart = mat->rmap->rstart;
3198   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3199   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3200   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3201   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3202 
3203   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3204   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3205   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3206 
3207   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3208   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210 
3211   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3212 
3213   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3214   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3215 
3216   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3217   /* off-process column indices */
3218   count = 0;
3219   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3220   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3221 
3222   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3223   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3224   for (i=0; i<Bn; i++) {
3225     if (PetscRealPart(xarray[i]) > -1.0) {
3226       idx[count]     = i;                   /* local column index in off-diagonal part B */
3227       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3228       count++;
3229     }
3230   }
3231   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3232   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3233 
3234   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3235   /* cannot ensure iscol_o has same blocksize as iscol! */
3236 
3237   ierr = PetscFree(idx);CHKERRQ(ierr);
3238   *garray = cmap1;
3239 
3240   ierr = VecDestroy(&x);CHKERRQ(ierr);
3241   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3242   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3243   PetscFunctionReturn(0);
3244 }
3245 
3246 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3247 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3248 {
3249   PetscErrorCode ierr;
3250   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3251   Mat            M = NULL;
3252   MPI_Comm       comm;
3253   IS             iscol_d,isrow_d,iscol_o;
3254   Mat            Asub = NULL,Bsub = NULL;
3255   PetscInt       n;
3256 
3257   PetscFunctionBegin;
3258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3259 
3260   if (call == MAT_REUSE_MATRIX) {
3261     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3262     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3263     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3264 
3265     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3266     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3267 
3268     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3269     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3270 
3271     /* Update diagonal and off-diagonal portions of submat */
3272     asub = (Mat_MPIAIJ*)(*submat)->data;
3273     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3274     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3275     if (n) {
3276       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3277     }
3278     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3280 
3281   } else { /* call == MAT_INITIAL_MATRIX) */
3282     const PetscInt *garray;
3283     PetscInt        BsubN;
3284 
3285     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3286     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3287 
3288     /* Create local submatrices Asub and Bsub */
3289     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3290     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3291 
3292     /* Create submatrix M */
3293     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3294 
3295     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3296     asub = (Mat_MPIAIJ*)M->data;
3297 
3298     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3299     n = asub->B->cmap->N;
3300     if (BsubN > n) {
3301       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3302       const PetscInt *idx;
3303       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3304       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3305 
3306       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3307       j = 0;
3308       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3309       for (i=0; i<n; i++) {
3310         if (j >= BsubN) break;
3311         while (subgarray[i] > garray[j]) j++;
3312 
3313         if (subgarray[i] == garray[j]) {
3314           idx_new[i] = idx[j++];
3315         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3316       }
3317       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3318 
3319       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3320       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3321 
3322     } else if (BsubN < n) {
3323       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3324     }
3325 
3326     ierr = PetscFree(garray);CHKERRQ(ierr);
3327     *submat = M;
3328 
3329     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3330     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3331     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3332 
3333     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3334     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3335 
3336     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3337     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3338   }
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3343 {
3344   PetscErrorCode ierr;
3345   IS             iscol_local=NULL,isrow_d;
3346   PetscInt       csize;
3347   PetscInt       n,i,j,start,end;
3348   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3349   MPI_Comm       comm;
3350 
3351   PetscFunctionBegin;
3352   /* If isrow has same processor distribution as mat,
3353      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3354   if (call == MAT_REUSE_MATRIX) {
3355     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3356     if (isrow_d) {
3357       sameRowDist  = PETSC_TRUE;
3358       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3359     } else {
3360       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3361       if (iscol_local) {
3362         sameRowDist  = PETSC_TRUE;
3363         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3364       }
3365     }
3366   } else {
3367     /* Check if isrow has same processor distribution as mat */
3368     sameDist[0] = PETSC_FALSE;
3369     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3370     if (!n) {
3371       sameDist[0] = PETSC_TRUE;
3372     } else {
3373       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3374       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3375       if (i >= start && j < end) {
3376         sameDist[0] = PETSC_TRUE;
3377       }
3378     }
3379 
3380     /* Check if iscol has same processor distribution as mat */
3381     sameDist[1] = PETSC_FALSE;
3382     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3383     if (!n) {
3384       sameDist[1] = PETSC_TRUE;
3385     } else {
3386       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3387       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3388       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3389     }
3390 
3391     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3392     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3393     sameRowDist = tsameDist[0];
3394   }
3395 
3396   if (sameRowDist) {
3397     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3398       /* isrow and iscol have same processor distribution as mat */
3399       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3400       PetscFunctionReturn(0);
3401     } else { /* sameRowDist */
3402       /* isrow has same processor distribution as mat */
3403       if (call == MAT_INITIAL_MATRIX) {
3404         PetscBool sorted;
3405         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3406         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3407         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3408         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3409 
3410         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3411         if (sorted) {
3412           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3413           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3414           PetscFunctionReturn(0);
3415         }
3416       } else { /* call == MAT_REUSE_MATRIX */
3417         IS    iscol_sub;
3418         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3419         if (iscol_sub) {
3420           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3421           PetscFunctionReturn(0);
3422         }
3423       }
3424     }
3425   }
3426 
3427   /* General case: iscol -> iscol_local which has global size of iscol */
3428   if (call == MAT_REUSE_MATRIX) {
3429     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3430     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3431   } else {
3432     if (!iscol_local) {
3433       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3434     }
3435   }
3436 
3437   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3438   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3439 
3440   if (call == MAT_INITIAL_MATRIX) {
3441     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3442     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3443   }
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 /*@C
3448      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3449          and "off-diagonal" part of the matrix in CSR format.
3450 
3451    Collective on MPI_Comm
3452 
3453    Input Parameters:
3454 +  comm - MPI communicator
3455 .  A - "diagonal" portion of matrix
3456 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3457 -  garray - global index of B columns
3458 
3459    Output Parameter:
3460 .   mat - the matrix, with input A as its local diagonal matrix
3461    Level: advanced
3462 
3463    Notes:
3464        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3465        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3466 
3467 .seealso: MatCreateMPIAIJWithSplitArrays()
3468 @*/
3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3470 {
3471   PetscErrorCode ierr;
3472   Mat_MPIAIJ     *maij;
3473   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3474   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3475   PetscScalar    *oa=b->a;
3476   Mat            Bnew;
3477   PetscInt       m,n,N;
3478 
3479   PetscFunctionBegin;
3480   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3481   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3482   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3483   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3484   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3485   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3486 
3487   /* Get global columns of mat */
3488   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3489 
3490   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3491   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3492   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3493   maij = (Mat_MPIAIJ*)(*mat)->data;
3494 
3495   (*mat)->preallocated = PETSC_TRUE;
3496 
3497   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3498   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3499 
3500   /* Set A as diagonal portion of *mat */
3501   maij->A = A;
3502 
3503   nz = oi[m];
3504   for (i=0; i<nz; i++) {
3505     col   = oj[i];
3506     oj[i] = garray[col];
3507   }
3508 
3509    /* Set Bnew as off-diagonal portion of *mat */
3510   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3511   bnew        = (Mat_SeqAIJ*)Bnew->data;
3512   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3513   maij->B     = Bnew;
3514 
3515   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3516 
3517   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3518   b->free_a       = PETSC_FALSE;
3519   b->free_ij      = PETSC_FALSE;
3520   ierr = MatDestroy(&B);CHKERRQ(ierr);
3521 
3522   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3523   bnew->free_a       = PETSC_TRUE;
3524   bnew->free_ij      = PETSC_TRUE;
3525 
3526   /* condense columns of maij->B */
3527   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3528   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3529   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3530   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3531   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3536 
3537 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3538 {
3539   PetscErrorCode ierr;
3540   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3541   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3542   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3543   Mat            M,Msub,B=a->B;
3544   MatScalar      *aa;
3545   Mat_SeqAIJ     *aij;
3546   PetscInt       *garray = a->garray,*colsub,Ncols;
3547   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3548   IS             iscol_sub,iscmap;
3549   const PetscInt *is_idx,*cmap;
3550   PetscBool      allcolumns=PETSC_FALSE;
3551   MPI_Comm       comm;
3552 
3553   PetscFunctionBegin;
3554   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3555 
3556   if (call == MAT_REUSE_MATRIX) {
3557     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3558     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3559     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3560 
3561     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3562     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3563 
3564     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3565     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3566 
3567     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3568 
3569   } else { /* call == MAT_INITIAL_MATRIX) */
3570     PetscBool flg;
3571 
3572     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3573     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3574 
3575     /* (1) iscol -> nonscalable iscol_local */
3576     /* Check for special case: each processor gets entire matrix columns */
3577     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3578     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3579     if (allcolumns) {
3580       iscol_sub = iscol_local;
3581       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3582       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3583 
3584     } else {
3585       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3586       PetscInt *idx,*cmap1,k;
3587       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3588       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3589       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3590       count = 0;
3591       k     = 0;
3592       for (i=0; i<Ncols; i++) {
3593         j = is_idx[i];
3594         if (j >= cstart && j < cend) {
3595           /* diagonal part of mat */
3596           idx[count]     = j;
3597           cmap1[count++] = i; /* column index in submat */
3598         } else if (Bn) {
3599           /* off-diagonal part of mat */
3600           if (j == garray[k]) {
3601             idx[count]     = j;
3602             cmap1[count++] = i;  /* column index in submat */
3603           } else if (j > garray[k]) {
3604             while (j > garray[k] && k < Bn-1) k++;
3605             if (j == garray[k]) {
3606               idx[count]     = j;
3607               cmap1[count++] = i; /* column index in submat */
3608             }
3609           }
3610         }
3611       }
3612       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3613 
3614       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3615       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3616       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3617 
3618       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3619     }
3620 
3621     /* (3) Create sequential Msub */
3622     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3623   }
3624 
3625   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3626   aij  = (Mat_SeqAIJ*)(Msub)->data;
3627   ii   = aij->i;
3628   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3629 
3630   /*
3631       m - number of local rows
3632       Ncols - number of columns (same on all processors)
3633       rstart - first row in new global matrix generated
3634   */
3635   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3636 
3637   if (call == MAT_INITIAL_MATRIX) {
3638     /* (4) Create parallel newmat */
3639     PetscMPIInt    rank,size;
3640     PetscInt       csize;
3641 
3642     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3643     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3644 
3645     /*
3646         Determine the number of non-zeros in the diagonal and off-diagonal
3647         portions of the matrix in order to do correct preallocation
3648     */
3649 
3650     /* first get start and end of "diagonal" columns */
3651     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3652     if (csize == PETSC_DECIDE) {
3653       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3654       if (mglobal == Ncols) { /* square matrix */
3655         nlocal = m;
3656       } else {
3657         nlocal = Ncols/size + ((Ncols % size) > rank);
3658       }
3659     } else {
3660       nlocal = csize;
3661     }
3662     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3663     rstart = rend - nlocal;
3664     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3665 
3666     /* next, compute all the lengths */
3667     jj    = aij->j;
3668     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3669     olens = dlens + m;
3670     for (i=0; i<m; i++) {
3671       jend = ii[i+1] - ii[i];
3672       olen = 0;
3673       dlen = 0;
3674       for (j=0; j<jend; j++) {
3675         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3676         else dlen++;
3677         jj++;
3678       }
3679       olens[i] = olen;
3680       dlens[i] = dlen;
3681     }
3682 
3683     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3684     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3685 
3686     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3687     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3688     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3689     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3690     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3691     ierr = PetscFree(dlens);CHKERRQ(ierr);
3692 
3693   } else { /* call == MAT_REUSE_MATRIX */
3694     M    = *newmat;
3695     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3696     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3697     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3698     /*
3699          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3700        rather than the slower MatSetValues().
3701     */
3702     M->was_assembled = PETSC_TRUE;
3703     M->assembled     = PETSC_FALSE;
3704   }
3705 
3706   /* (5) Set values of Msub to *newmat */
3707   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3708   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3709 
3710   jj   = aij->j;
3711   aa   = aij->a;
3712   for (i=0; i<m; i++) {
3713     row = rstart + i;
3714     nz  = ii[i+1] - ii[i];
3715     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3716     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3717     jj += nz; aa += nz;
3718   }
3719   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3720 
3721   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3722   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3723 
3724   ierr = PetscFree(colsub);CHKERRQ(ierr);
3725 
3726   /* save Msub, iscol_sub and iscmap used in processor for next request */
3727   if (call ==  MAT_INITIAL_MATRIX) {
3728     *newmat = M;
3729     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3730     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3731 
3732     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3733     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3734 
3735     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3736     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3737 
3738     if (iscol_local) {
3739       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3740       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3741     }
3742   }
3743   PetscFunctionReturn(0);
3744 }
3745 
3746 /*
3747     Not great since it makes two copies of the submatrix, first an SeqAIJ
3748   in local and then by concatenating the local matrices the end result.
3749   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3750 
3751   Note: This requires a sequential iscol with all indices.
3752 */
3753 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3754 {
3755   PetscErrorCode ierr;
3756   PetscMPIInt    rank,size;
3757   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3758   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3759   Mat            M,Mreuse;
3760   MatScalar      *aa,*vwork;
3761   MPI_Comm       comm;
3762   Mat_SeqAIJ     *aij;
3763   PetscBool      colflag,allcolumns=PETSC_FALSE;
3764 
3765   PetscFunctionBegin;
3766   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3767   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3768   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3769 
3770   /* Check for special case: each processor gets entire matrix columns */
3771   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3772   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3773   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3774 
3775   if (call ==  MAT_REUSE_MATRIX) {
3776     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3777     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3778     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3779   } else {
3780     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3781   }
3782 
3783   /*
3784       m - number of local rows
3785       n - number of columns (same on all processors)
3786       rstart - first row in new global matrix generated
3787   */
3788   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3789   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3790   if (call == MAT_INITIAL_MATRIX) {
3791     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3792     ii  = aij->i;
3793     jj  = aij->j;
3794 
3795     /*
3796         Determine the number of non-zeros in the diagonal and off-diagonal
3797         portions of the matrix in order to do correct preallocation
3798     */
3799 
3800     /* first get start and end of "diagonal" columns */
3801     if (csize == PETSC_DECIDE) {
3802       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3803       if (mglobal == n) { /* square matrix */
3804         nlocal = m;
3805       } else {
3806         nlocal = n/size + ((n % size) > rank);
3807       }
3808     } else {
3809       nlocal = csize;
3810     }
3811     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3812     rstart = rend - nlocal;
3813     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3814 
3815     /* next, compute all the lengths */
3816     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3817     olens = dlens + m;
3818     for (i=0; i<m; i++) {
3819       jend = ii[i+1] - ii[i];
3820       olen = 0;
3821       dlen = 0;
3822       for (j=0; j<jend; j++) {
3823         if (*jj < rstart || *jj >= rend) olen++;
3824         else dlen++;
3825         jj++;
3826       }
3827       olens[i] = olen;
3828       dlens[i] = dlen;
3829     }
3830     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3831     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3832     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3833     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3834     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3835     ierr = PetscFree(dlens);CHKERRQ(ierr);
3836   } else {
3837     PetscInt ml,nl;
3838 
3839     M    = *newmat;
3840     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3841     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3842     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3843     /*
3844          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3845        rather than the slower MatSetValues().
3846     */
3847     M->was_assembled = PETSC_TRUE;
3848     M->assembled     = PETSC_FALSE;
3849   }
3850   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3851   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3852   ii   = aij->i;
3853   jj   = aij->j;
3854   aa   = aij->a;
3855   for (i=0; i<m; i++) {
3856     row   = rstart + i;
3857     nz    = ii[i+1] - ii[i];
3858     cwork = jj;     jj += nz;
3859     vwork = aa;     aa += nz;
3860     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3861   }
3862 
3863   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   *newmat = M;
3866 
3867   /* save submatrix used in processor for next request */
3868   if (call ==  MAT_INITIAL_MATRIX) {
3869     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3870     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3871   }
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3876 {
3877   PetscInt       m,cstart, cend,j,nnz,i,d;
3878   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3879   const PetscInt *JJ;
3880   PetscScalar    *values;
3881   PetscErrorCode ierr;
3882   PetscBool      nooffprocentries;
3883 
3884   PetscFunctionBegin;
3885   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3886 
3887   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3888   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3889   m      = B->rmap->n;
3890   cstart = B->cmap->rstart;
3891   cend   = B->cmap->rend;
3892   rstart = B->rmap->rstart;
3893 
3894   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3895 
3896 #if defined(PETSC_USE_DEBUG)
3897   for (i=0; i<m; i++) {
3898     nnz = Ii[i+1]- Ii[i];
3899     JJ  = J + Ii[i];
3900     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3901     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3902     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3903   }
3904 #endif
3905 
3906   for (i=0; i<m; i++) {
3907     nnz     = Ii[i+1]- Ii[i];
3908     JJ      = J + Ii[i];
3909     nnz_max = PetscMax(nnz_max,nnz);
3910     d       = 0;
3911     for (j=0; j<nnz; j++) {
3912       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3913     }
3914     d_nnz[i] = d;
3915     o_nnz[i] = nnz - d;
3916   }
3917   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3918   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3919 
3920   if (v) values = (PetscScalar*)v;
3921   else {
3922     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3923   }
3924 
3925   for (i=0; i<m; i++) {
3926     ii   = i + rstart;
3927     nnz  = Ii[i+1]- Ii[i];
3928     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3929   }
3930   nooffprocentries    = B->nooffprocentries;
3931   B->nooffprocentries = PETSC_TRUE;
3932   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   B->nooffprocentries = nooffprocentries;
3935 
3936   if (!v) {
3937     ierr = PetscFree(values);CHKERRQ(ierr);
3938   }
3939   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 /*@
3944    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3945    (the default parallel PETSc format).
3946 
3947    Collective on MPI_Comm
3948 
3949    Input Parameters:
3950 +  B - the matrix
3951 .  i - the indices into j for the start of each local row (starts with zero)
3952 .  j - the column indices for each local row (starts with zero)
3953 -  v - optional values in the matrix
3954 
3955    Level: developer
3956 
3957    Notes:
3958        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3959      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3960      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3961 
3962        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3963 
3964        The format which is used for the sparse matrix input, is equivalent to a
3965     row-major ordering.. i.e for the following matrix, the input data expected is
3966     as shown
3967 
3968 $        1 0 0
3969 $        2 0 3     P0
3970 $       -------
3971 $        4 5 6     P1
3972 $
3973 $     Process0 [P0]: rows_owned=[0,1]
3974 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3975 $        j =  {0,0,2}  [size = 3]
3976 $        v =  {1,2,3}  [size = 3]
3977 $
3978 $     Process1 [P1]: rows_owned=[2]
3979 $        i =  {0,3}    [size = nrow+1  = 1+1]
3980 $        j =  {0,1,2}  [size = 3]
3981 $        v =  {4,5,6}  [size = 3]
3982 
3983 .keywords: matrix, aij, compressed row, sparse, parallel
3984 
3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3986           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3987 @*/
3988 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3989 {
3990   PetscErrorCode ierr;
3991 
3992   PetscFunctionBegin;
3993   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3994   PetscFunctionReturn(0);
3995 }
3996 
3997 /*@C
3998    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3999    (the default parallel PETSc format).  For good matrix assembly performance
4000    the user should preallocate the matrix storage by setting the parameters
4001    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4002    performance can be increased by more than a factor of 50.
4003 
4004    Collective on MPI_Comm
4005 
4006    Input Parameters:
4007 +  B - the matrix
4008 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4009            (same value is used for all local rows)
4010 .  d_nnz - array containing the number of nonzeros in the various rows of the
4011            DIAGONAL portion of the local submatrix (possibly different for each row)
4012            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4013            The size of this array is equal to the number of local rows, i.e 'm'.
4014            For matrices that will be factored, you must leave room for (and set)
4015            the diagonal entry even if it is zero.
4016 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4017            submatrix (same value is used for all local rows).
4018 -  o_nnz - array containing the number of nonzeros in the various rows of the
4019            OFF-DIAGONAL portion of the local submatrix (possibly different for
4020            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4021            structure. The size of this array is equal to the number
4022            of local rows, i.e 'm'.
4023 
4024    If the *_nnz parameter is given then the *_nz parameter is ignored
4025 
4026    The AIJ format (also called the Yale sparse matrix format or
4027    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4028    storage.  The stored row and column indices begin with zero.
4029    See Users-Manual: ch_mat for details.
4030 
4031    The parallel matrix is partitioned such that the first m0 rows belong to
4032    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4033    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4034 
4035    The DIAGONAL portion of the local submatrix of a processor can be defined
4036    as the submatrix which is obtained by extraction the part corresponding to
4037    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4038    first row that belongs to the processor, r2 is the last row belonging to
4039    the this processor, and c1-c2 is range of indices of the local part of a
4040    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4041    common case of a square matrix, the row and column ranges are the same and
4042    the DIAGONAL part is also square. The remaining portion of the local
4043    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4044 
4045    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4046 
4047    You can call MatGetInfo() to get information on how effective the preallocation was;
4048    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4049    You can also run with the option -info and look for messages with the string
4050    malloc in them to see if additional memory allocation was needed.
4051 
4052    Example usage:
4053 
4054    Consider the following 8x8 matrix with 34 non-zero values, that is
4055    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4056    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4057    as follows:
4058 
4059 .vb
4060             1  2  0  |  0  3  0  |  0  4
4061     Proc0   0  5  6  |  7  0  0  |  8  0
4062             9  0 10  | 11  0  0  | 12  0
4063     -------------------------------------
4064            13  0 14  | 15 16 17  |  0  0
4065     Proc1   0 18  0  | 19 20 21  |  0  0
4066             0  0  0  | 22 23  0  | 24  0
4067     -------------------------------------
4068     Proc2  25 26 27  |  0  0 28  | 29  0
4069            30  0  0  | 31 32 33  |  0 34
4070 .ve
4071 
4072    This can be represented as a collection of submatrices as:
4073 
4074 .vb
4075       A B C
4076       D E F
4077       G H I
4078 .ve
4079 
4080    Where the submatrices A,B,C are owned by proc0, D,E,F are
4081    owned by proc1, G,H,I are owned by proc2.
4082 
4083    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4084    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4085    The 'M','N' parameters are 8,8, and have the same values on all procs.
4086 
4087    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4088    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4089    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4090    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4091    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4092    matrix, ans [DF] as another SeqAIJ matrix.
4093 
4094    When d_nz, o_nz parameters are specified, d_nz storage elements are
4095    allocated for every row of the local diagonal submatrix, and o_nz
4096    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4097    One way to choose d_nz and o_nz is to use the max nonzerors per local
4098    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4099    In this case, the values of d_nz,o_nz are:
4100 .vb
4101      proc0 : dnz = 2, o_nz = 2
4102      proc1 : dnz = 3, o_nz = 2
4103      proc2 : dnz = 1, o_nz = 4
4104 .ve
4105    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4106    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4107    for proc3. i.e we are using 12+15+10=37 storage locations to store
4108    34 values.
4109 
4110    When d_nnz, o_nnz parameters are specified, the storage is specified
4111    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4112    In the above case the values for d_nnz,o_nnz are:
4113 .vb
4114      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4115      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4116      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4117 .ve
4118    Here the space allocated is sum of all the above values i.e 34, and
4119    hence pre-allocation is perfect.
4120 
4121    Level: intermediate
4122 
4123 .keywords: matrix, aij, compressed row, sparse, parallel
4124 
4125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4126           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4127 @*/
4128 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4129 {
4130   PetscErrorCode ierr;
4131 
4132   PetscFunctionBegin;
4133   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4134   PetscValidType(B,1);
4135   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4136   PetscFunctionReturn(0);
4137 }
4138 
4139 /*@
4140      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4141          CSR format the local rows.
4142 
4143    Collective on MPI_Comm
4144 
4145    Input Parameters:
4146 +  comm - MPI communicator
4147 .  m - number of local rows (Cannot be PETSC_DECIDE)
4148 .  n - This value should be the same as the local size used in creating the
4149        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4150        calculated if N is given) For square matrices n is almost always m.
4151 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4152 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4153 .   i - row indices
4154 .   j - column indices
4155 -   a - matrix values
4156 
4157    Output Parameter:
4158 .   mat - the matrix
4159 
4160    Level: intermediate
4161 
4162    Notes:
4163        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4164      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4165      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4166 
4167        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4168 
4169        The format which is used for the sparse matrix input, is equivalent to a
4170     row-major ordering.. i.e for the following matrix, the input data expected is
4171     as shown
4172 
4173 $        1 0 0
4174 $        2 0 3     P0
4175 $       -------
4176 $        4 5 6     P1
4177 $
4178 $     Process0 [P0]: rows_owned=[0,1]
4179 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4180 $        j =  {0,0,2}  [size = 3]
4181 $        v =  {1,2,3}  [size = 3]
4182 $
4183 $     Process1 [P1]: rows_owned=[2]
4184 $        i =  {0,3}    [size = nrow+1  = 1+1]
4185 $        j =  {0,1,2}  [size = 3]
4186 $        v =  {4,5,6}  [size = 3]
4187 
4188 .keywords: matrix, aij, compressed row, sparse, parallel
4189 
4190 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4191           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4192 @*/
4193 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4194 {
4195   PetscErrorCode ierr;
4196 
4197   PetscFunctionBegin;
4198   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4199   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4200   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4201   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4202   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4203   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4204   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4205   PetscFunctionReturn(0);
4206 }
4207 
4208 /*@C
4209    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4210    (the default parallel PETSc format).  For good matrix assembly performance
4211    the user should preallocate the matrix storage by setting the parameters
4212    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4213    performance can be increased by more than a factor of 50.
4214 
4215    Collective on MPI_Comm
4216 
4217    Input Parameters:
4218 +  comm - MPI communicator
4219 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4220            This value should be the same as the local size used in creating the
4221            y vector for the matrix-vector product y = Ax.
4222 .  n - This value should be the same as the local size used in creating the
4223        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4224        calculated if N is given) For square matrices n is almost always m.
4225 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4226 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4227 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4228            (same value is used for all local rows)
4229 .  d_nnz - array containing the number of nonzeros in the various rows of the
4230            DIAGONAL portion of the local submatrix (possibly different for each row)
4231            or NULL, if d_nz is used to specify the nonzero structure.
4232            The size of this array is equal to the number of local rows, i.e 'm'.
4233 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4234            submatrix (same value is used for all local rows).
4235 -  o_nnz - array containing the number of nonzeros in the various rows of the
4236            OFF-DIAGONAL portion of the local submatrix (possibly different for
4237            each row) or NULL, if o_nz is used to specify the nonzero
4238            structure. The size of this array is equal to the number
4239            of local rows, i.e 'm'.
4240 
4241    Output Parameter:
4242 .  A - the matrix
4243 
4244    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4245    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4246    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4247 
4248    Notes:
4249    If the *_nnz parameter is given then the *_nz parameter is ignored
4250 
4251    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4252    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4253    storage requirements for this matrix.
4254 
4255    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4256    processor than it must be used on all processors that share the object for
4257    that argument.
4258 
4259    The user MUST specify either the local or global matrix dimensions
4260    (possibly both).
4261 
4262    The parallel matrix is partitioned across processors such that the
4263    first m0 rows belong to process 0, the next m1 rows belong to
4264    process 1, the next m2 rows belong to process 2 etc.. where
4265    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4266    values corresponding to [m x N] submatrix.
4267 
4268    The columns are logically partitioned with the n0 columns belonging
4269    to 0th partition, the next n1 columns belonging to the next
4270    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4271 
4272    The DIAGONAL portion of the local submatrix on any given processor
4273    is the submatrix corresponding to the rows and columns m,n
4274    corresponding to the given processor. i.e diagonal matrix on
4275    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4276    etc. The remaining portion of the local submatrix [m x (N-n)]
4277    constitute the OFF-DIAGONAL portion. The example below better
4278    illustrates this concept.
4279 
4280    For a square global matrix we define each processor's diagonal portion
4281    to be its local rows and the corresponding columns (a square submatrix);
4282    each processor's off-diagonal portion encompasses the remainder of the
4283    local matrix (a rectangular submatrix).
4284 
4285    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4286 
4287    When calling this routine with a single process communicator, a matrix of
4288    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4289    type of communicator, use the construction mechanism
4290 .vb
4291      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4292 .ve
4293 
4294 $     MatCreate(...,&A);
4295 $     MatSetType(A,MATMPIAIJ);
4296 $     MatSetSizes(A, m,n,M,N);
4297 $     MatMPIAIJSetPreallocation(A,...);
4298 
4299    By default, this format uses inodes (identical nodes) when possible.
4300    We search for consecutive rows with the same nonzero structure, thereby
4301    reusing matrix information to achieve increased efficiency.
4302 
4303    Options Database Keys:
4304 +  -mat_no_inode  - Do not use inodes
4305 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4306 
4307 
4308 
4309    Example usage:
4310 
4311    Consider the following 8x8 matrix with 34 non-zero values, that is
4312    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4313    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4314    as follows
4315 
4316 .vb
4317             1  2  0  |  0  3  0  |  0  4
4318     Proc0   0  5  6  |  7  0  0  |  8  0
4319             9  0 10  | 11  0  0  | 12  0
4320     -------------------------------------
4321            13  0 14  | 15 16 17  |  0  0
4322     Proc1   0 18  0  | 19 20 21  |  0  0
4323             0  0  0  | 22 23  0  | 24  0
4324     -------------------------------------
4325     Proc2  25 26 27  |  0  0 28  | 29  0
4326            30  0  0  | 31 32 33  |  0 34
4327 .ve
4328 
4329    This can be represented as a collection of submatrices as
4330 
4331 .vb
4332       A B C
4333       D E F
4334       G H I
4335 .ve
4336 
4337    Where the submatrices A,B,C are owned by proc0, D,E,F are
4338    owned by proc1, G,H,I are owned by proc2.
4339 
4340    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4341    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4342    The 'M','N' parameters are 8,8, and have the same values on all procs.
4343 
4344    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4345    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4346    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4347    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4348    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4349    matrix, ans [DF] as another SeqAIJ matrix.
4350 
4351    When d_nz, o_nz parameters are specified, d_nz storage elements are
4352    allocated for every row of the local diagonal submatrix, and o_nz
4353    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4354    One way to choose d_nz and o_nz is to use the max nonzerors per local
4355    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4356    In this case, the values of d_nz,o_nz are
4357 .vb
4358      proc0 : dnz = 2, o_nz = 2
4359      proc1 : dnz = 3, o_nz = 2
4360      proc2 : dnz = 1, o_nz = 4
4361 .ve
4362    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4363    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4364    for proc3. i.e we are using 12+15+10=37 storage locations to store
4365    34 values.
4366 
4367    When d_nnz, o_nnz parameters are specified, the storage is specified
4368    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4369    In the above case the values for d_nnz,o_nnz are
4370 .vb
4371      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4372      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4373      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4374 .ve
4375    Here the space allocated is sum of all the above values i.e 34, and
4376    hence pre-allocation is perfect.
4377 
4378    Level: intermediate
4379 
4380 .keywords: matrix, aij, compressed row, sparse, parallel
4381 
4382 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4383           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4384 @*/
4385 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4386 {
4387   PetscErrorCode ierr;
4388   PetscMPIInt    size;
4389 
4390   PetscFunctionBegin;
4391   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4392   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4393   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4394   if (size > 1) {
4395     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4396     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4397   } else {
4398     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4399     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4400   }
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4405 {
4406   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4407   PetscBool      flg;
4408   PetscErrorCode ierr;
4409 
4410   PetscFunctionBegin;
4411   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4412   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4413   if (Ad)     *Ad     = a->A;
4414   if (Ao)     *Ao     = a->B;
4415   if (colmap) *colmap = a->garray;
4416   PetscFunctionReturn(0);
4417 }
4418 
4419 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4420 {
4421   PetscErrorCode ierr;
4422   PetscInt       m,N,i,rstart,nnz,Ii;
4423   PetscInt       *indx;
4424   PetscScalar    *values;
4425 
4426   PetscFunctionBegin;
4427   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4428   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4429     PetscInt       *dnz,*onz,sum,bs,cbs;
4430 
4431     if (n == PETSC_DECIDE) {
4432       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4433     }
4434     /* Check sum(n) = N */
4435     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4436     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4437 
4438     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4439     rstart -= m;
4440 
4441     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4442     for (i=0; i<m; i++) {
4443       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4444       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4445       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4446     }
4447 
4448     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4449     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4450     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4451     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4452     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4453     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4454     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4455     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4456   }
4457 
4458   /* numeric phase */
4459   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4460   for (i=0; i<m; i++) {
4461     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4462     Ii   = i + rstart;
4463     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4464     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4465   }
4466   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4467   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4472 {
4473   PetscErrorCode    ierr;
4474   PetscMPIInt       rank;
4475   PetscInt          m,N,i,rstart,nnz;
4476   size_t            len;
4477   const PetscInt    *indx;
4478   PetscViewer       out;
4479   char              *name;
4480   Mat               B;
4481   const PetscScalar *values;
4482 
4483   PetscFunctionBegin;
4484   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4485   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4486   /* Should this be the type of the diagonal block of A? */
4487   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4488   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4489   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4490   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4491   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4492   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4493   for (i=0; i<m; i++) {
4494     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4495     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4496     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4497   }
4498   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4499   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4500 
4501   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4502   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4503   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4504   sprintf(name,"%s.%d",outfile,rank);
4505   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4506   ierr = PetscFree(name);CHKERRQ(ierr);
4507   ierr = MatView(B,out);CHKERRQ(ierr);
4508   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4509   ierr = MatDestroy(&B);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4514 {
4515   PetscErrorCode      ierr;
4516   Mat_Merge_SeqsToMPI *merge;
4517   PetscContainer      container;
4518 
4519   PetscFunctionBegin;
4520   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4521   if (container) {
4522     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4523     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4524     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4535     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4536     ierr = PetscFree(merge);CHKERRQ(ierr);
4537     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4538   }
4539   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4540   PetscFunctionReturn(0);
4541 }
4542 
4543 #include <../src/mat/utils/freespace.h>
4544 #include <petscbt.h>
4545 
4546 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4547 {
4548   PetscErrorCode      ierr;
4549   MPI_Comm            comm;
4550   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4551   PetscMPIInt         size,rank,taga,*len_s;
4552   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4553   PetscInt            proc,m;
4554   PetscInt            **buf_ri,**buf_rj;
4555   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4556   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4557   MPI_Request         *s_waits,*r_waits;
4558   MPI_Status          *status;
4559   MatScalar           *aa=a->a;
4560   MatScalar           **abuf_r,*ba_i;
4561   Mat_Merge_SeqsToMPI *merge;
4562   PetscContainer      container;
4563 
4564   PetscFunctionBegin;
4565   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4566   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4567 
4568   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4569   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4570 
4571   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4572   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4573 
4574   bi     = merge->bi;
4575   bj     = merge->bj;
4576   buf_ri = merge->buf_ri;
4577   buf_rj = merge->buf_rj;
4578 
4579   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4580   owners = merge->rowmap->range;
4581   len_s  = merge->len_s;
4582 
4583   /* send and recv matrix values */
4584   /*-----------------------------*/
4585   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4586   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4587 
4588   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4589   for (proc=0,k=0; proc<size; proc++) {
4590     if (!len_s[proc]) continue;
4591     i    = owners[proc];
4592     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4593     k++;
4594   }
4595 
4596   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4597   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4598   ierr = PetscFree(status);CHKERRQ(ierr);
4599 
4600   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4601   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4602 
4603   /* insert mat values of mpimat */
4604   /*----------------------------*/
4605   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4606   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4607 
4608   for (k=0; k<merge->nrecv; k++) {
4609     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4610     nrows       = *(buf_ri_k[k]);
4611     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4612     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4613   }
4614 
4615   /* set values of ba */
4616   m = merge->rowmap->n;
4617   for (i=0; i<m; i++) {
4618     arow = owners[rank] + i;
4619     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4620     bnzi = bi[i+1] - bi[i];
4621     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4622 
4623     /* add local non-zero vals of this proc's seqmat into ba */
4624     anzi   = ai[arow+1] - ai[arow];
4625     aj     = a->j + ai[arow];
4626     aa     = a->a + ai[arow];
4627     nextaj = 0;
4628     for (j=0; nextaj<anzi; j++) {
4629       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4630         ba_i[j] += aa[nextaj++];
4631       }
4632     }
4633 
4634     /* add received vals into ba */
4635     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4636       /* i-th row */
4637       if (i == *nextrow[k]) {
4638         anzi   = *(nextai[k]+1) - *nextai[k];
4639         aj     = buf_rj[k] + *(nextai[k]);
4640         aa     = abuf_r[k] + *(nextai[k]);
4641         nextaj = 0;
4642         for (j=0; nextaj<anzi; j++) {
4643           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4644             ba_i[j] += aa[nextaj++];
4645           }
4646         }
4647         nextrow[k]++; nextai[k]++;
4648       }
4649     }
4650     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4651   }
4652   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4653   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4654 
4655   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4656   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4657   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4658   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4659   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4664 {
4665   PetscErrorCode      ierr;
4666   Mat                 B_mpi;
4667   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4668   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4669   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4670   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4671   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4672   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4673   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4674   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4675   MPI_Status          *status;
4676   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4677   PetscBT             lnkbt;
4678   Mat_Merge_SeqsToMPI *merge;
4679   PetscContainer      container;
4680 
4681   PetscFunctionBegin;
4682   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4683 
4684   /* make sure it is a PETSc comm */
4685   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4686   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4687   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4688 
4689   ierr = PetscNew(&merge);CHKERRQ(ierr);
4690   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4691 
4692   /* determine row ownership */
4693   /*---------------------------------------------------------*/
4694   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4695   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4696   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4697   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4698   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4699   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4700   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4701 
4702   m      = merge->rowmap->n;
4703   owners = merge->rowmap->range;
4704 
4705   /* determine the number of messages to send, their lengths */
4706   /*---------------------------------------------------------*/
4707   len_s = merge->len_s;
4708 
4709   len          = 0; /* length of buf_si[] */
4710   merge->nsend = 0;
4711   for (proc=0; proc<size; proc++) {
4712     len_si[proc] = 0;
4713     if (proc == rank) {
4714       len_s[proc] = 0;
4715     } else {
4716       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4717       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4718     }
4719     if (len_s[proc]) {
4720       merge->nsend++;
4721       nrows = 0;
4722       for (i=owners[proc]; i<owners[proc+1]; i++) {
4723         if (ai[i+1] > ai[i]) nrows++;
4724       }
4725       len_si[proc] = 2*(nrows+1);
4726       len         += len_si[proc];
4727     }
4728   }
4729 
4730   /* determine the number and length of messages to receive for ij-structure */
4731   /*-------------------------------------------------------------------------*/
4732   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4733   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4734 
4735   /* post the Irecv of j-structure */
4736   /*-------------------------------*/
4737   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4738   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4739 
4740   /* post the Isend of j-structure */
4741   /*--------------------------------*/
4742   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4743 
4744   for (proc=0, k=0; proc<size; proc++) {
4745     if (!len_s[proc]) continue;
4746     i    = owners[proc];
4747     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4748     k++;
4749   }
4750 
4751   /* receives and sends of j-structure are complete */
4752   /*------------------------------------------------*/
4753   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4754   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4755 
4756   /* send and recv i-structure */
4757   /*---------------------------*/
4758   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4759   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4760 
4761   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4762   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4763   for (proc=0,k=0; proc<size; proc++) {
4764     if (!len_s[proc]) continue;
4765     /* form outgoing message for i-structure:
4766          buf_si[0]:                 nrows to be sent
4767                [1:nrows]:           row index (global)
4768                [nrows+1:2*nrows+1]: i-structure index
4769     */
4770     /*-------------------------------------------*/
4771     nrows       = len_si[proc]/2 - 1;
4772     buf_si_i    = buf_si + nrows+1;
4773     buf_si[0]   = nrows;
4774     buf_si_i[0] = 0;
4775     nrows       = 0;
4776     for (i=owners[proc]; i<owners[proc+1]; i++) {
4777       anzi = ai[i+1] - ai[i];
4778       if (anzi) {
4779         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4780         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4781         nrows++;
4782       }
4783     }
4784     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4785     k++;
4786     buf_si += len_si[proc];
4787   }
4788 
4789   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4790   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4791 
4792   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4793   for (i=0; i<merge->nrecv; i++) {
4794     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4795   }
4796 
4797   ierr = PetscFree(len_si);CHKERRQ(ierr);
4798   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4799   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4800   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4801   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4802   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4803   ierr = PetscFree(status);CHKERRQ(ierr);
4804 
4805   /* compute a local seq matrix in each processor */
4806   /*----------------------------------------------*/
4807   /* allocate bi array and free space for accumulating nonzero column info */
4808   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4809   bi[0] = 0;
4810 
4811   /* create and initialize a linked list */
4812   nlnk = N+1;
4813   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4814 
4815   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4816   len  = ai[owners[rank+1]] - ai[owners[rank]];
4817   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4818 
4819   current_space = free_space;
4820 
4821   /* determine symbolic info for each local row */
4822   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4823 
4824   for (k=0; k<merge->nrecv; k++) {
4825     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4826     nrows       = *buf_ri_k[k];
4827     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4828     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4829   }
4830 
4831   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4832   len  = 0;
4833   for (i=0; i<m; i++) {
4834     bnzi = 0;
4835     /* add local non-zero cols of this proc's seqmat into lnk */
4836     arow  = owners[rank] + i;
4837     anzi  = ai[arow+1] - ai[arow];
4838     aj    = a->j + ai[arow];
4839     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4840     bnzi += nlnk;
4841     /* add received col data into lnk */
4842     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4843       if (i == *nextrow[k]) { /* i-th row */
4844         anzi  = *(nextai[k]+1) - *nextai[k];
4845         aj    = buf_rj[k] + *nextai[k];
4846         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4847         bnzi += nlnk;
4848         nextrow[k]++; nextai[k]++;
4849       }
4850     }
4851     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4852 
4853     /* if free space is not available, make more free space */
4854     if (current_space->local_remaining<bnzi) {
4855       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4856       nspacedouble++;
4857     }
4858     /* copy data into free space, then initialize lnk */
4859     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4860     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4861 
4862     current_space->array           += bnzi;
4863     current_space->local_used      += bnzi;
4864     current_space->local_remaining -= bnzi;
4865 
4866     bi[i+1] = bi[i] + bnzi;
4867   }
4868 
4869   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4870 
4871   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4872   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4873   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4874 
4875   /* create symbolic parallel matrix B_mpi */
4876   /*---------------------------------------*/
4877   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4878   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4879   if (n==PETSC_DECIDE) {
4880     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4881   } else {
4882     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4883   }
4884   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4885   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4886   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4887   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4888   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4889 
4890   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4891   B_mpi->assembled    = PETSC_FALSE;
4892   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4893   merge->bi           = bi;
4894   merge->bj           = bj;
4895   merge->buf_ri       = buf_ri;
4896   merge->buf_rj       = buf_rj;
4897   merge->coi          = NULL;
4898   merge->coj          = NULL;
4899   merge->owners_co    = NULL;
4900 
4901   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4902 
4903   /* attach the supporting struct to B_mpi for reuse */
4904   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4905   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4906   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4907   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4908   *mpimat = B_mpi;
4909 
4910   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4911   PetscFunctionReturn(0);
4912 }
4913 
4914 /*@C
4915       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4916                  matrices from each processor
4917 
4918     Collective on MPI_Comm
4919 
4920    Input Parameters:
4921 +    comm - the communicators the parallel matrix will live on
4922 .    seqmat - the input sequential matrices
4923 .    m - number of local rows (or PETSC_DECIDE)
4924 .    n - number of local columns (or PETSC_DECIDE)
4925 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4926 
4927    Output Parameter:
4928 .    mpimat - the parallel matrix generated
4929 
4930     Level: advanced
4931 
4932    Notes:
4933      The dimensions of the sequential matrix in each processor MUST be the same.
4934      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4935      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4936 @*/
4937 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4938 {
4939   PetscErrorCode ierr;
4940   PetscMPIInt    size;
4941 
4942   PetscFunctionBegin;
4943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4944   if (size == 1) {
4945     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4946     if (scall == MAT_INITIAL_MATRIX) {
4947       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4948     } else {
4949       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4950     }
4951     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4952     PetscFunctionReturn(0);
4953   }
4954   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4955   if (scall == MAT_INITIAL_MATRIX) {
4956     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4957   }
4958   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4959   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4960   PetscFunctionReturn(0);
4961 }
4962 
4963 /*@
4964      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4965           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4966           with MatGetSize()
4967 
4968     Not Collective
4969 
4970    Input Parameters:
4971 +    A - the matrix
4972 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4973 
4974    Output Parameter:
4975 .    A_loc - the local sequential matrix generated
4976 
4977     Level: developer
4978 
4979 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4980 
4981 @*/
4982 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4983 {
4984   PetscErrorCode ierr;
4985   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4986   Mat_SeqAIJ     *mat,*a,*b;
4987   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4988   MatScalar      *aa,*ba,*cam;
4989   PetscScalar    *ca;
4990   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4991   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4992   PetscBool      match;
4993   MPI_Comm       comm;
4994   PetscMPIInt    size;
4995 
4996   PetscFunctionBegin;
4997   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4998   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4999   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5000   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5001   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5002 
5003   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5004   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5005   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5006   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5007   aa = a->a; ba = b->a;
5008   if (scall == MAT_INITIAL_MATRIX) {
5009     if (size == 1) {
5010       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5011       PetscFunctionReturn(0);
5012     }
5013 
5014     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5015     ci[0] = 0;
5016     for (i=0; i<am; i++) {
5017       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5018     }
5019     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5020     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5021     k    = 0;
5022     for (i=0; i<am; i++) {
5023       ncols_o = bi[i+1] - bi[i];
5024       ncols_d = ai[i+1] - ai[i];
5025       /* off-diagonal portion of A */
5026       for (jo=0; jo<ncols_o; jo++) {
5027         col = cmap[*bj];
5028         if (col >= cstart) break;
5029         cj[k]   = col; bj++;
5030         ca[k++] = *ba++;
5031       }
5032       /* diagonal portion of A */
5033       for (j=0; j<ncols_d; j++) {
5034         cj[k]   = cstart + *aj++;
5035         ca[k++] = *aa++;
5036       }
5037       /* off-diagonal portion of A */
5038       for (j=jo; j<ncols_o; j++) {
5039         cj[k]   = cmap[*bj++];
5040         ca[k++] = *ba++;
5041       }
5042     }
5043     /* put together the new matrix */
5044     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5045     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5046     /* Since these are PETSc arrays, change flags to free them as necessary. */
5047     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5048     mat->free_a  = PETSC_TRUE;
5049     mat->free_ij = PETSC_TRUE;
5050     mat->nonew   = 0;
5051   } else if (scall == MAT_REUSE_MATRIX) {
5052     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5053     ci = mat->i; cj = mat->j; cam = mat->a;
5054     for (i=0; i<am; i++) {
5055       /* off-diagonal portion of A */
5056       ncols_o = bi[i+1] - bi[i];
5057       for (jo=0; jo<ncols_o; jo++) {
5058         col = cmap[*bj];
5059         if (col >= cstart) break;
5060         *cam++ = *ba++; bj++;
5061       }
5062       /* diagonal portion of A */
5063       ncols_d = ai[i+1] - ai[i];
5064       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5065       /* off-diagonal portion of A */
5066       for (j=jo; j<ncols_o; j++) {
5067         *cam++ = *ba++; bj++;
5068       }
5069     }
5070   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5071   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5072   PetscFunctionReturn(0);
5073 }
5074 
5075 /*@C
5076      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5077 
5078     Not Collective
5079 
5080    Input Parameters:
5081 +    A - the matrix
5082 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5083 -    row, col - index sets of rows and columns to extract (or NULL)
5084 
5085    Output Parameter:
5086 .    A_loc - the local sequential matrix generated
5087 
5088     Level: developer
5089 
5090 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5091 
5092 @*/
5093 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5094 {
5095   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5096   PetscErrorCode ierr;
5097   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5098   IS             isrowa,iscola;
5099   Mat            *aloc;
5100   PetscBool      match;
5101 
5102   PetscFunctionBegin;
5103   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5104   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5105   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5106   if (!row) {
5107     start = A->rmap->rstart; end = A->rmap->rend;
5108     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5109   } else {
5110     isrowa = *row;
5111   }
5112   if (!col) {
5113     start = A->cmap->rstart;
5114     cmap  = a->garray;
5115     nzA   = a->A->cmap->n;
5116     nzB   = a->B->cmap->n;
5117     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5118     ncols = 0;
5119     for (i=0; i<nzB; i++) {
5120       if (cmap[i] < start) idx[ncols++] = cmap[i];
5121       else break;
5122     }
5123     imark = i;
5124     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5125     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5126     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5127   } else {
5128     iscola = *col;
5129   }
5130   if (scall != MAT_INITIAL_MATRIX) {
5131     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5132     aloc[0] = *A_loc;
5133   }
5134   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5135   if (!col) { /* attach global id of condensed columns */
5136     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5137   }
5138   *A_loc = aloc[0];
5139   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5140   if (!row) {
5141     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5142   }
5143   if (!col) {
5144     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5145   }
5146   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5147   PetscFunctionReturn(0);
5148 }
5149 
5150 /*@C
5151     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5152 
5153     Collective on Mat
5154 
5155    Input Parameters:
5156 +    A,B - the matrices in mpiaij format
5157 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5158 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5159 
5160    Output Parameter:
5161 +    rowb, colb - index sets of rows and columns of B to extract
5162 -    B_seq - the sequential matrix generated
5163 
5164     Level: developer
5165 
5166 @*/
5167 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5168 {
5169   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5170   PetscErrorCode ierr;
5171   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5172   IS             isrowb,iscolb;
5173   Mat            *bseq=NULL;
5174 
5175   PetscFunctionBegin;
5176   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5177     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5178   }
5179   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5180 
5181   if (scall == MAT_INITIAL_MATRIX) {
5182     start = A->cmap->rstart;
5183     cmap  = a->garray;
5184     nzA   = a->A->cmap->n;
5185     nzB   = a->B->cmap->n;
5186     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5187     ncols = 0;
5188     for (i=0; i<nzB; i++) {  /* row < local row index */
5189       if (cmap[i] < start) idx[ncols++] = cmap[i];
5190       else break;
5191     }
5192     imark = i;
5193     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5194     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5195     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5196     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5197   } else {
5198     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5199     isrowb  = *rowb; iscolb = *colb;
5200     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5201     bseq[0] = *B_seq;
5202   }
5203   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5204   *B_seq = bseq[0];
5205   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5206   if (!rowb) {
5207     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5208   } else {
5209     *rowb = isrowb;
5210   }
5211   if (!colb) {
5212     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5213   } else {
5214     *colb = iscolb;
5215   }
5216   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5217   PetscFunctionReturn(0);
5218 }
5219 
5220 /*
5221     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5222     of the OFF-DIAGONAL portion of local A
5223 
5224     Collective on Mat
5225 
5226    Input Parameters:
5227 +    A,B - the matrices in mpiaij format
5228 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5229 
5230    Output Parameter:
5231 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5232 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5233 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5234 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5235 
5236     Level: developer
5237 
5238 */
5239 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5240 {
5241   VecScatter_MPI_General *gen_to,*gen_from;
5242   PetscErrorCode         ierr;
5243   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5244   Mat_SeqAIJ             *b_oth;
5245   VecScatter             ctx;
5246   MPI_Comm               comm;
5247   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5248   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5249   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5250   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5251   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5252   MPI_Request            *rwaits = NULL,*swaits = NULL;
5253   MPI_Status             *sstatus,rstatus;
5254   PetscMPIInt            jj,size;
5255   VecScatterType         type;
5256   PetscBool              mpi1;
5257 
5258   PetscFunctionBegin;
5259   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5260   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5261 
5262   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5263     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5264   }
5265   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5266   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5267 
5268   if (size == 1) {
5269     startsj_s = NULL;
5270     bufa_ptr  = NULL;
5271     *B_oth    = NULL;
5272     PetscFunctionReturn(0);
5273   }
5274 
5275   ctx = a->Mvctx;
5276   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5277   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5278   if (!mpi1) {
5279     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5280      thus create a->Mvctx_mpi1 */
5281     if (!a->Mvctx_mpi1) {
5282       a->Mvctx_mpi1_flg = PETSC_TRUE;
5283       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5284     }
5285     ctx = a->Mvctx_mpi1;
5286   }
5287   tag = ((PetscObject)ctx)->tag;
5288 
5289   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5290   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5291   nrecvs   = gen_from->n;
5292   nsends   = gen_to->n;
5293 
5294   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5295   srow    = gen_to->indices;    /* local row index to be sent */
5296   sstarts = gen_to->starts;
5297   sprocs  = gen_to->procs;
5298   sstatus = gen_to->sstatus;
5299   sbs     = gen_to->bs;
5300   rstarts = gen_from->starts;
5301   rprocs  = gen_from->procs;
5302   rbs     = gen_from->bs;
5303 
5304   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5305   if (scall == MAT_INITIAL_MATRIX) {
5306     /* i-array */
5307     /*---------*/
5308     /*  post receives */
5309     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5310     for (i=0; i<nrecvs; i++) {
5311       rowlen = rvalues + rstarts[i]*rbs;
5312       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5313       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5314     }
5315 
5316     /* pack the outgoing message */
5317     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5318 
5319     sstartsj[0] = 0;
5320     rstartsj[0] = 0;
5321     len         = 0; /* total length of j or a array to be sent */
5322     k           = 0;
5323     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5324     for (i=0; i<nsends; i++) {
5325       rowlen = svalues + sstarts[i]*sbs;
5326       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5327       for (j=0; j<nrows; j++) {
5328         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5329         for (l=0; l<sbs; l++) {
5330           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5331 
5332           rowlen[j*sbs+l] = ncols;
5333 
5334           len += ncols;
5335           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5336         }
5337         k++;
5338       }
5339       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5340 
5341       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5342     }
5343     /* recvs and sends of i-array are completed */
5344     i = nrecvs;
5345     while (i--) {
5346       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5347     }
5348     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5349     ierr = PetscFree(svalues);CHKERRQ(ierr);
5350 
5351     /* allocate buffers for sending j and a arrays */
5352     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5353     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5354 
5355     /* create i-array of B_oth */
5356     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5357 
5358     b_othi[0] = 0;
5359     len       = 0; /* total length of j or a array to be received */
5360     k         = 0;
5361     for (i=0; i<nrecvs; i++) {
5362       rowlen = rvalues + rstarts[i]*rbs;
5363       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5364       for (j=0; j<nrows; j++) {
5365         b_othi[k+1] = b_othi[k] + rowlen[j];
5366         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5367         k++;
5368       }
5369       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5370     }
5371     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5372 
5373     /* allocate space for j and a arrrays of B_oth */
5374     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5375     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5376 
5377     /* j-array */
5378     /*---------*/
5379     /*  post receives of j-array */
5380     for (i=0; i<nrecvs; i++) {
5381       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5382       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5383     }
5384 
5385     /* pack the outgoing message j-array */
5386     k = 0;
5387     for (i=0; i<nsends; i++) {
5388       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5389       bufJ  = bufj+sstartsj[i];
5390       for (j=0; j<nrows; j++) {
5391         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5392         for (ll=0; ll<sbs; ll++) {
5393           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5394           for (l=0; l<ncols; l++) {
5395             *bufJ++ = cols[l];
5396           }
5397           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5398         }
5399       }
5400       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5401     }
5402 
5403     /* recvs and sends of j-array are completed */
5404     i = nrecvs;
5405     while (i--) {
5406       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5407     }
5408     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5409   } else if (scall == MAT_REUSE_MATRIX) {
5410     sstartsj = *startsj_s;
5411     rstartsj = *startsj_r;
5412     bufa     = *bufa_ptr;
5413     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5414     b_otha   = b_oth->a;
5415   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5416 
5417   /* a-array */
5418   /*---------*/
5419   /*  post receives of a-array */
5420   for (i=0; i<nrecvs; i++) {
5421     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5422     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5423   }
5424 
5425   /* pack the outgoing message a-array */
5426   k = 0;
5427   for (i=0; i<nsends; i++) {
5428     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5429     bufA  = bufa+sstartsj[i];
5430     for (j=0; j<nrows; j++) {
5431       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5432       for (ll=0; ll<sbs; ll++) {
5433         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5434         for (l=0; l<ncols; l++) {
5435           *bufA++ = vals[l];
5436         }
5437         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5438       }
5439     }
5440     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5441   }
5442   /* recvs and sends of a-array are completed */
5443   i = nrecvs;
5444   while (i--) {
5445     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5446   }
5447   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5448   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5449 
5450   if (scall == MAT_INITIAL_MATRIX) {
5451     /* put together the new matrix */
5452     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5453 
5454     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5455     /* Since these are PETSc arrays, change flags to free them as necessary. */
5456     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5457     b_oth->free_a  = PETSC_TRUE;
5458     b_oth->free_ij = PETSC_TRUE;
5459     b_oth->nonew   = 0;
5460 
5461     ierr = PetscFree(bufj);CHKERRQ(ierr);
5462     if (!startsj_s || !bufa_ptr) {
5463       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5464       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5465     } else {
5466       *startsj_s = sstartsj;
5467       *startsj_r = rstartsj;
5468       *bufa_ptr  = bufa;
5469     }
5470   }
5471   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5472   PetscFunctionReturn(0);
5473 }
5474 
5475 /*@C
5476   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5477 
5478   Not Collective
5479 
5480   Input Parameters:
5481 . A - The matrix in mpiaij format
5482 
5483   Output Parameter:
5484 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5485 . colmap - A map from global column index to local index into lvec
5486 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5487 
5488   Level: developer
5489 
5490 @*/
5491 #if defined(PETSC_USE_CTABLE)
5492 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5493 #else
5494 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5495 #endif
5496 {
5497   Mat_MPIAIJ *a;
5498 
5499   PetscFunctionBegin;
5500   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5501   PetscValidPointer(lvec, 2);
5502   PetscValidPointer(colmap, 3);
5503   PetscValidPointer(multScatter, 4);
5504   a = (Mat_MPIAIJ*) A->data;
5505   if (lvec) *lvec = a->lvec;
5506   if (colmap) *colmap = a->colmap;
5507   if (multScatter) *multScatter = a->Mvctx;
5508   PetscFunctionReturn(0);
5509 }
5510 
5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5513 #if defined(PETSC_HAVE_MKL_SPARSE)
5514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5515 #endif
5516 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5517 #if defined(PETSC_HAVE_ELEMENTAL)
5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5519 #endif
5520 #if defined(PETSC_HAVE_HYPRE)
5521 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5522 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5523 #endif
5524 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5526 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5527 
5528 /*
5529     Computes (B'*A')' since computing B*A directly is untenable
5530 
5531                n                       p                          p
5532         (              )       (              )         (                  )
5533       m (      A       )  *  n (       B      )   =   m (         C        )
5534         (              )       (              )         (                  )
5535 
5536 */
5537 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5538 {
5539   PetscErrorCode ierr;
5540   Mat            At,Bt,Ct;
5541 
5542   PetscFunctionBegin;
5543   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5544   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5545   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5546   ierr = MatDestroy(&At);CHKERRQ(ierr);
5547   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5548   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5549   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5554 {
5555   PetscErrorCode ierr;
5556   PetscInt       m=A->rmap->n,n=B->cmap->n;
5557   Mat            Cmat;
5558 
5559   PetscFunctionBegin;
5560   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5561   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5562   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5563   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5564   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5565   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5566   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5567   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5568 
5569   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5570 
5571   *C = Cmat;
5572   PetscFunctionReturn(0);
5573 }
5574 
5575 /* ----------------------------------------------------------------*/
5576 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5577 {
5578   PetscErrorCode ierr;
5579 
5580   PetscFunctionBegin;
5581   if (scall == MAT_INITIAL_MATRIX) {
5582     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5583     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5584     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5585   }
5586   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5587   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5588   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5589   PetscFunctionReturn(0);
5590 }
5591 
5592 /*MC
5593    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5594 
5595    Options Database Keys:
5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5597 
5598   Level: beginner
5599 
5600 .seealso: MatCreateAIJ()
5601 M*/
5602 
5603 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5604 {
5605   Mat_MPIAIJ     *b;
5606   PetscErrorCode ierr;
5607   PetscMPIInt    size;
5608 
5609   PetscFunctionBegin;
5610   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5611 
5612   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5613   B->data       = (void*)b;
5614   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5615   B->assembled  = PETSC_FALSE;
5616   B->insertmode = NOT_SET_VALUES;
5617   b->size       = size;
5618 
5619   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5620 
5621   /* build cache for off array entries formed */
5622   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5623 
5624   b->donotstash  = PETSC_FALSE;
5625   b->colmap      = 0;
5626   b->garray      = 0;
5627   b->roworiented = PETSC_TRUE;
5628 
5629   /* stuff used for matrix vector multiply */
5630   b->lvec  = NULL;
5631   b->Mvctx = NULL;
5632 
5633   /* stuff for MatGetRow() */
5634   b->rowindices   = 0;
5635   b->rowvalues    = 0;
5636   b->getrowactive = PETSC_FALSE;
5637 
5638   /* flexible pointer used in CUSP/CUSPARSE classes */
5639   b->spptr = NULL;
5640 
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5650 #if defined(PETSC_HAVE_MKL_SPARSE)
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5652 #endif
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5655 #if defined(PETSC_HAVE_ELEMENTAL)
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5657 #endif
5658 #if defined(PETSC_HAVE_HYPRE)
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5660 #endif
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5666 #if defined(PETSC_HAVE_HYPRE)
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5668 #endif
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5671   PetscFunctionReturn(0);
5672 }
5673 
5674 /*@C
5675      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5676          and "off-diagonal" part of the matrix in CSR format.
5677 
5678    Collective on MPI_Comm
5679 
5680    Input Parameters:
5681 +  comm - MPI communicator
5682 .  m - number of local rows (Cannot be PETSC_DECIDE)
5683 .  n - This value should be the same as the local size used in creating the
5684        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5685        calculated if N is given) For square matrices n is almost always m.
5686 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5687 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5688 .   i - row indices for "diagonal" portion of matrix
5689 .   j - column indices
5690 .   a - matrix values
5691 .   oi - row indices for "off-diagonal" portion of matrix
5692 .   oj - column indices
5693 -   oa - matrix values
5694 
5695    Output Parameter:
5696 .   mat - the matrix
5697 
5698    Level: advanced
5699 
5700    Notes:
5701        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5702        must free the arrays once the matrix has been destroyed and not before.
5703 
5704        The i and j indices are 0 based
5705 
5706        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5707 
5708        This sets local rows and cannot be used to set off-processor values.
5709 
5710        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5711        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5712        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5713        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5714        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5715        communication if it is known that only local entries will be set.
5716 
5717 .keywords: matrix, aij, compressed row, sparse, parallel
5718 
5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5720           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5721 @*/
5722 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5723 {
5724   PetscErrorCode ierr;
5725   Mat_MPIAIJ     *maij;
5726 
5727   PetscFunctionBegin;
5728   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5729   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5730   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5731   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5732   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5733   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5734   maij = (Mat_MPIAIJ*) (*mat)->data;
5735 
5736   (*mat)->preallocated = PETSC_TRUE;
5737 
5738   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5739   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5740 
5741   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5742   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5743 
5744   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5745   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5747   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5748 
5749   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5750   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5753   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5754   PetscFunctionReturn(0);
5755 }
5756 
5757 /*
5758     Special version for direct calls from Fortran
5759 */
5760 #include <petsc/private/fortranimpl.h>
5761 
5762 /* Change these macros so can be used in void function */
5763 #undef CHKERRQ
5764 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5765 #undef SETERRQ2
5766 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5767 #undef SETERRQ3
5768 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5769 #undef SETERRQ
5770 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5771 
5772 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5773 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5774 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5775 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5776 #else
5777 #endif
5778 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5779 {
5780   Mat            mat  = *mmat;
5781   PetscInt       m    = *mm, n = *mn;
5782   InsertMode     addv = *maddv;
5783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5784   PetscScalar    value;
5785   PetscErrorCode ierr;
5786 
5787   MatCheckPreallocated(mat,1);
5788   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5789 
5790 #if defined(PETSC_USE_DEBUG)
5791   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5792 #endif
5793   {
5794     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5795     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5796     PetscBool roworiented = aij->roworiented;
5797 
5798     /* Some Variables required in the macro */
5799     Mat        A                 = aij->A;
5800     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5801     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5802     MatScalar  *aa               = a->a;
5803     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5804     Mat        B                 = aij->B;
5805     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5806     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5807     MatScalar  *ba               = b->a;
5808 
5809     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5810     PetscInt  nonew = a->nonew;
5811     MatScalar *ap1,*ap2;
5812 
5813     PetscFunctionBegin;
5814     for (i=0; i<m; i++) {
5815       if (im[i] < 0) continue;
5816 #if defined(PETSC_USE_DEBUG)
5817       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5818 #endif
5819       if (im[i] >= rstart && im[i] < rend) {
5820         row      = im[i] - rstart;
5821         lastcol1 = -1;
5822         rp1      = aj + ai[row];
5823         ap1      = aa + ai[row];
5824         rmax1    = aimax[row];
5825         nrow1    = ailen[row];
5826         low1     = 0;
5827         high1    = nrow1;
5828         lastcol2 = -1;
5829         rp2      = bj + bi[row];
5830         ap2      = ba + bi[row];
5831         rmax2    = bimax[row];
5832         nrow2    = bilen[row];
5833         low2     = 0;
5834         high2    = nrow2;
5835 
5836         for (j=0; j<n; j++) {
5837           if (roworiented) value = v[i*n+j];
5838           else value = v[i+j*m];
5839           if (in[j] >= cstart && in[j] < cend) {
5840             col = in[j] - cstart;
5841             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5842             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5843           } else if (in[j] < 0) continue;
5844 #if defined(PETSC_USE_DEBUG)
5845           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5846           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5847 #endif
5848           else {
5849             if (mat->was_assembled) {
5850               if (!aij->colmap) {
5851                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5852               }
5853 #if defined(PETSC_USE_CTABLE)
5854               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5855               col--;
5856 #else
5857               col = aij->colmap[in[j]] - 1;
5858 #endif
5859               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5860               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5861                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5862                 col  =  in[j];
5863                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5864                 B     = aij->B;
5865                 b     = (Mat_SeqAIJ*)B->data;
5866                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5867                 rp2   = bj + bi[row];
5868                 ap2   = ba + bi[row];
5869                 rmax2 = bimax[row];
5870                 nrow2 = bilen[row];
5871                 low2  = 0;
5872                 high2 = nrow2;
5873                 bm    = aij->B->rmap->n;
5874                 ba    = b->a;
5875               }
5876             } else col = in[j];
5877             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5878           }
5879         }
5880       } else if (!aij->donotstash) {
5881         if (roworiented) {
5882           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5883         } else {
5884           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5885         }
5886       }
5887     }
5888   }
5889   PetscFunctionReturnVoid();
5890 }
5891 
5892