xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ca9cdca734cf4f1d31e92cffb2b9ba44d2fb8be9)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscMPIInt    size;
1105 
1106   PetscFunctionBegin;
1107   /* Easy test: symmetric diagonal block */
1108   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1109   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1110   if (!*f) PetscFunctionReturn(0);
1111   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1113   if (size == 1) PetscFunctionReturn(0);
1114 
1115   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1116   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1117   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1118   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1119   for (i=0; i<first; i++) notme[i] = i;
1120   for (i=last; i<M; i++) notme[i-last+first] = i;
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1122   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1123   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1124   Aoff = Aoffs[0];
1125   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1126   Boff = Boffs[0];
1127   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1128   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1129   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1130   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1131   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1132   ierr = PetscFree(notme);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1137 {
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* send it on its way */
1154   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155   /* do local part */
1156   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1157   /* receive remote parts */
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1167 {
1168   PetscErrorCode ierr;
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170 
1171   PetscFunctionBegin;
1172   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1173   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1174   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1179 {
1180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1185   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1190 {
1191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195 #if defined(PETSC_USE_LOG)
1196   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1197 #endif
1198   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1199   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1200   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1201   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1202 #if defined(PETSC_USE_CTABLE)
1203   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1204 #else
1205   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1206 #endif
1207   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1208   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1209   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1210   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1211   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1212   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1213   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1214 
1215   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_ELEMENTAL)
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1226 #endif
1227 #if defined(PETSC_HAVE_HYPRE)
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1230 #endif
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1237 {
1238   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1239   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1240   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1241   PetscErrorCode ierr;
1242   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1243   int            fd;
1244   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1245   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1246   PetscScalar    *column_values;
1247   PetscInt       message_count,flowcontrolcount;
1248   FILE           *file;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1252   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1253   nz   = A->nz + B->nz;
1254   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1255   if (!rank) {
1256     header[0] = MAT_FILE_CLASSID;
1257     header[1] = mat->rmap->N;
1258     header[2] = mat->cmap->N;
1259 
1260     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     /* get largest number of rows any processor has */
1263     rlen  = mat->rmap->n;
1264     range = mat->rmap->range;
1265     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1266   } else {
1267     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     rlen = mat->rmap->n;
1269   }
1270 
1271   /* load up the local row counts */
1272   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1273   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1274 
1275   /* store the row lengths to the file */
1276   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1279     for (i=1; i<size; i++) {
1280       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1281       rlen = range[i+1] - range[i];
1282       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     }
1285     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1286   } else {
1287     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1288     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1290   }
1291   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1292 
1293   /* load up the local column indices */
1294   nzmax = nz; /* th processor needs space a largest processor needs */
1295   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1297   cnt   = 0;
1298   for (i=0; i<mat->rmap->n; i++) {
1299     for (j=B->i[i]; j<B->i[i+1]; j++) {
1300       if ((col = garray[B->j[j]]) > cstart) break;
1301       column_indices[cnt++] = col;
1302     }
1303     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1304     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1305   }
1306   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1307 
1308   /* store the column indices to the file */
1309   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1310   if (!rank) {
1311     MPI_Status status;
1312     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     for (i=1; i<size; i++) {
1314       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1315       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1316       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1317       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1319     }
1320     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1321   } else {
1322     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1323     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1324     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1325     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1326   }
1327   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1328 
1329   /* load up the local column values */
1330   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1331   cnt  = 0;
1332   for (i=0; i<mat->rmap->n; i++) {
1333     for (j=B->i[i]; j<B->i[i+1]; j++) {
1334       if (garray[B->j[j]] > cstart) break;
1335       column_values[cnt++] = B->a[j];
1336     }
1337     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1338     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1339   }
1340   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1341 
1342   /* store the column values to the file */
1343   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1344   if (!rank) {
1345     MPI_Status status;
1346     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1350       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1351       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1352       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1353     }
1354     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1355   } else {
1356     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1357     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1359     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1360   }
1361   ierr = PetscFree(column_values);CHKERRQ(ierr);
1362 
1363   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1364   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #include <petscdraw.h>
1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1370 {
1371   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1372   PetscErrorCode    ierr;
1373   PetscMPIInt       rank = aij->rank,size = aij->size;
1374   PetscBool         isdraw,iascii,isbinary;
1375   PetscViewer       sviewer;
1376   PetscViewerFormat format;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1380   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1382   if (iascii) {
1383     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1384     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1385       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1386       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1387       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1388       for (i=0; i<(PetscInt)size; i++) {
1389         nmax = PetscMax(nmax,nz[i]);
1390         nmin = PetscMin(nmin,nz[i]);
1391         navg += nz[i];
1392       }
1393       ierr = PetscFree(nz);CHKERRQ(ierr);
1394       navg = navg/size;
1395       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1396       PetscFunctionReturn(0);
1397     }
1398     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1399     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1400       MatInfo   info;
1401       PetscBool inodes;
1402 
1403       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1404       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1405       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1406       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1407       if (!inodes) {
1408         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1409                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1410       } else {
1411         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1412                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1413       }
1414       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1416       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1420       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1421       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1422       PetscFunctionReturn(0);
1423     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1424       PetscInt inodecount,inodelimit,*inodes;
1425       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1426       if (inodes) {
1427         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1428       } else {
1429         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1430       }
1431       PetscFunctionReturn(0);
1432     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1433       PetscFunctionReturn(0);
1434     }
1435   } else if (isbinary) {
1436     if (size == 1) {
1437       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1438       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1439     } else {
1440       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1441     }
1442     PetscFunctionReturn(0);
1443   } else if (isdraw) {
1444     PetscDraw draw;
1445     PetscBool isnull;
1446     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1447     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1448     if (isnull) PetscFunctionReturn(0);
1449   }
1450 
1451   {
1452     /* assemble the entire matrix onto first processor. */
1453     Mat        A;
1454     Mat_SeqAIJ *Aloc;
1455     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1456     MatScalar  *a;
1457 
1458     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1459     if (!rank) {
1460       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1463     }
1464     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1465     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1466     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1467     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1468     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1469 
1470     /* copy over the A part */
1471     Aloc = (Mat_SeqAIJ*)aij->A->data;
1472     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1473     row  = mat->rmap->rstart;
1474     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1475     for (i=0; i<m; i++) {
1476       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1477       row++;
1478       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1479     }
1480     aj = Aloc->j;
1481     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1482 
1483     /* copy over the B part */
1484     Aloc = (Mat_SeqAIJ*)aij->B->data;
1485     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1486     row  = mat->rmap->rstart;
1487     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1488     ct   = cols;
1489     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1490     for (i=0; i<m; i++) {
1491       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1492       row++;
1493       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1494     }
1495     ierr = PetscFree(ct);CHKERRQ(ierr);
1496     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1497     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1498     /*
1499        Everyone has to call to draw the matrix since the graphics waits are
1500        synchronized across all processors that share the PetscDraw object
1501     */
1502     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1503     if (!rank) {
1504       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1505       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1506     }
1507     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1508     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1509     ierr = MatDestroy(&A);CHKERRQ(ierr);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1515 {
1516   PetscErrorCode ierr;
1517   PetscBool      iascii,isdraw,issocket,isbinary;
1518 
1519   PetscFunctionBegin;
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1521   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1524   if (iascii || isdraw || isbinary || issocket) {
1525     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1531 {
1532   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1533   PetscErrorCode ierr;
1534   Vec            bb1 = 0;
1535   PetscBool      hasop;
1536 
1537   PetscFunctionBegin;
1538   if (flag == SOR_APPLY_UPPER) {
1539     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1540     PetscFunctionReturn(0);
1541   }
1542 
1543   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1544     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1545   }
1546 
1547   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1548     if (flag & SOR_ZERO_INITIAL_GUESS) {
1549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550       its--;
1551     }
1552 
1553     while (its--) {
1554       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557       /* update rhs: bb1 = bb - B*x */
1558       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1560 
1561       /* local sweep */
1562       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1563     }
1564   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1565     if (flag & SOR_ZERO_INITIAL_GUESS) {
1566       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1567       its--;
1568     }
1569     while (its--) {
1570       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572 
1573       /* update rhs: bb1 = bb - B*x */
1574       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1575       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1576 
1577       /* local sweep */
1578       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1579     }
1580   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1581     if (flag & SOR_ZERO_INITIAL_GUESS) {
1582       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1583       its--;
1584     }
1585     while (its--) {
1586       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588 
1589       /* update rhs: bb1 = bb - B*x */
1590       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1591       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1592 
1593       /* local sweep */
1594       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1595     }
1596   } else if (flag & SOR_EISENSTAT) {
1597     Vec xx1;
1598 
1599     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1601 
1602     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (!mat->diag) {
1605       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1606       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1607     }
1608     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1609     if (hasop) {
1610       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1611     } else {
1612       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1613     }
1614     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1615 
1616     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1617 
1618     /* local sweep */
1619     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1620     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1621     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1622   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1623 
1624   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1625 
1626   matin->factorerrortype = mat->A->factorerrortype;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1631 {
1632   Mat            aA,aB,Aperm;
1633   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1634   PetscScalar    *aa,*ba;
1635   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1636   PetscSF        rowsf,sf;
1637   IS             parcolp = NULL;
1638   PetscBool      done;
1639   PetscErrorCode ierr;
1640 
1641   PetscFunctionBegin;
1642   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1643   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1644   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1645   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1646 
1647   /* Invert row permutation to find out where my rows should go */
1648   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1649   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1650   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1651   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1652   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1653   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1654 
1655   /* Invert column permutation to find out where my columns should go */
1656   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1657   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1658   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1659   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1660   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1661   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1662   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1663 
1664   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1665   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1666   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1667 
1668   /* Find out where my gcols should go */
1669   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1670   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1671   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1672   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1673   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1674   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1675   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1676   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1677 
1678   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1679   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1680   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1681   for (i=0; i<m; i++) {
1682     PetscInt row = rdest[i],rowner;
1683     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1684     for (j=ai[i]; j<ai[i+1]; j++) {
1685       PetscInt cowner,col = cdest[aj[j]];
1686       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1687       if (rowner == cowner) dnnz[i]++;
1688       else onnz[i]++;
1689     }
1690     for (j=bi[i]; j<bi[i+1]; j++) {
1691       PetscInt cowner,col = gcdest[bj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscReal      isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set = PETSC_TRUE;
1825     A->spd     = flg;
1826     if (flg) {
1827       A->symmetric                  = PETSC_TRUE;
1828       A->structurally_symmetric     = PETSC_TRUE;
1829       A->symmetric_set              = PETSC_TRUE;
1830       A->structurally_symmetric_set = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     MatCheckPreallocated(A,1);
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837   case MAT_STRUCTURALLY_SYMMETRIC:
1838     MatCheckPreallocated(A,1);
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_HERMITIAN:
1842     MatCheckPreallocated(A,1);
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   case MAT_SYMMETRY_ETERNAL:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_SUBMAT_SINGLEIS:
1850     A->submat_singleis = flg;
1851     break;
1852   case MAT_STRUCTURE_ONLY:
1853     /* The option is handled directly by MatSetOption() */
1854     break;
1855   default:
1856     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) max = tmp;
1883     }
1884     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz  = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1940 {
1941   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1942 
1943   PetscFunctionBegin;
1944   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1945   aij->getrowactive = PETSC_FALSE;
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1950 {
1951   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1952   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,j,cstart = mat->cmap->rstart;
1955   PetscReal      sum = 0.0;
1956   MatScalar      *v;
1957 
1958   PetscFunctionBegin;
1959   if (aij->size == 1) {
1960     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1961   } else {
1962     if (type == NORM_FROBENIUS) {
1963       v = amat->a;
1964       for (i=0; i<amat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970       }
1971       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1978       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v     = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v   = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2010       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2011     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2017 {
2018   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2019   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2020   PetscErrorCode ierr;
2021   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2022   PetscInt       cstart = A->cmap->rstart,ncol;
2023   Mat            B;
2024   MatScalar      *array;
2025 
2026   PetscFunctionBegin;
2027   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2028   ai = Aloc->i; aj = Aloc->j;
2029   bi = Bloc->i; bj = Bloc->j;
2030   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2031     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2032     PetscSFNode          *oloc;
2033     PETSC_UNUSED PetscSF sf;
2034 
2035     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2036     /* compute d_nnz for preallocation */
2037     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2038     for (i=0; i<ai[ma]; i++) {
2039       d_nnz[aj[i]]++;
2040       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2041     }
2042     /* compute local off-diagonal contributions */
2043     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2044     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2045     /* map those to global */
2046     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2047     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2048     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2049     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2050     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2051     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2052     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2053 
2054     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2055     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2056     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2057     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2058     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2059     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2060   } else {
2061     B    = *matout;
2062     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2063     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2064   }
2065 
2066   /* copy over the A part */
2067   array = Aloc->a;
2068   row   = A->rmap->rstart;
2069   for (i=0; i<ma; i++) {
2070     ncol = ai[i+1]-ai[i];
2071     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; aj += ncol;
2074   }
2075   aj = Aloc->j;
2076   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2077 
2078   /* copy over the B part */
2079   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2080   array = Bloc->a;
2081   row   = A->rmap->rstart;
2082   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2083   cols_tmp = cols;
2084   for (i=0; i<mb; i++) {
2085     ncol = bi[i+1]-bi[i];
2086     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2087     row++;
2088     array += ncol; cols_tmp += ncol;
2089   }
2090   ierr = PetscFree(cols);CHKERRQ(ierr);
2091 
2092   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2093   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2094   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2095     *matout = B;
2096   } else {
2097     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2098   }
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2103 {
2104   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2105   Mat            a    = aij->A,b = aij->B;
2106   PetscErrorCode ierr;
2107   PetscInt       s1,s2,s3;
2108 
2109   PetscFunctionBegin;
2110   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2111   if (rr) {
2112     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2113     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2114     /* Overlap communication with computation. */
2115     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2116   }
2117   if (ll) {
2118     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2119     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2120     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2121   }
2122   /* scale  the diagonal block */
2123   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2124 
2125   if (rr) {
2126     /* Do a scatter end and then right scale the off-diagonal block */
2127     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2128     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2129   }
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2134 {
2135   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2144 {
2145   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2146   Mat            a,b,c,d;
2147   PetscBool      flg;
2148   PetscErrorCode ierr;
2149 
2150   PetscFunctionBegin;
2151   a = matA->A; b = matA->B;
2152   c = matB->A; d = matB->B;
2153 
2154   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2155   if (flg) {
2156     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2157   }
2158   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2163 {
2164   PetscErrorCode ierr;
2165   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2166   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2167 
2168   PetscFunctionBegin;
2169   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2170   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2171     /* because of the column compression in the off-processor part of the matrix a->B,
2172        the number of columns in a->B and b->B may be different, hence we cannot call
2173        the MatCopy() directly on the two parts. If need be, we can provide a more
2174        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2175        then copying the submatrices */
2176     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2177   } else {
2178     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2179     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2180   }
2181   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2186 {
2187   PetscErrorCode ierr;
2188 
2189   PetscFunctionBegin;
2190   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 /*
2195    Computes the number of nonzeros per row needed for preallocation when X and Y
2196    have different nonzero structure.
2197 */
2198 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2199 {
2200   PetscInt       i,j,k,nzx,nzy;
2201 
2202   PetscFunctionBegin;
2203   /* Set the number of nonzeros in the new matrix */
2204   for (i=0; i<m; i++) {
2205     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2206     nzx = xi[i+1] - xi[i];
2207     nzy = yi[i+1] - yi[i];
2208     nnz[i] = 0;
2209     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2210       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2211       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2212       nnz[i]++;
2213     }
2214     for (; k<nzy; k++) nnz[i]++;
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2220 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2221 {
2222   PetscErrorCode ierr;
2223   PetscInt       m = Y->rmap->N;
2224   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2225   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2226 
2227   PetscFunctionBegin;
2228   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2233 {
2234   PetscErrorCode ierr;
2235   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2236   PetscBLASInt   bnz,one=1;
2237   Mat_SeqAIJ     *x,*y;
2238 
2239   PetscFunctionBegin;
2240   if (str == SAME_NONZERO_PATTERN) {
2241     PetscScalar alpha = a;
2242     x    = (Mat_SeqAIJ*)xx->A->data;
2243     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2244     y    = (Mat_SeqAIJ*)yy->A->data;
2245     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2246     x    = (Mat_SeqAIJ*)xx->B->data;
2247     y    = (Mat_SeqAIJ*)yy->B->data;
2248     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2249     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2250     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2251   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2252     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2253   } else {
2254     Mat      B;
2255     PetscInt *nnz_d,*nnz_o;
2256     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2257     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2258     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2259     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2260     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2261     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2262     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2263     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2264     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2265     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2266     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2267     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2268     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2269     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2275 
2276 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2277 {
2278 #if defined(PETSC_USE_COMPLEX)
2279   PetscErrorCode ierr;
2280   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2284   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2285 #else
2286   PetscFunctionBegin;
2287 #endif
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2298   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2303 {
2304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2309   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2314 {
2315   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2316   PetscErrorCode ierr;
2317   PetscInt       i,*idxb = 0;
2318   PetscScalar    *va,*vb;
2319   Vec            vtmp;
2320 
2321   PetscFunctionBegin;
2322   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2323   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2324   if (idx) {
2325     for (i=0; i<A->rmap->n; i++) {
2326       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2327     }
2328   }
2329 
2330   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2331   if (idx) {
2332     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2333   }
2334   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2335   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2336 
2337   for (i=0; i<A->rmap->n; i++) {
2338     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2339       va[i] = vb[i];
2340       if (idx) idx[i] = a->garray[idxb[i]];
2341     }
2342   }
2343 
2344   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2345   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2346   ierr = PetscFree(idxb);CHKERRQ(ierr);
2347   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2352 {
2353   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2354   PetscErrorCode ierr;
2355   PetscInt       i,*idxb = 0;
2356   PetscScalar    *va,*vb;
2357   Vec            vtmp;
2358 
2359   PetscFunctionBegin;
2360   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2361   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2362   if (idx) {
2363     for (i=0; i<A->cmap->n; i++) {
2364       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2365     }
2366   }
2367 
2368   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2369   if (idx) {
2370     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2371   }
2372   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2373   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2374 
2375   for (i=0; i<A->rmap->n; i++) {
2376     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2377       va[i] = vb[i];
2378       if (idx) idx[i] = a->garray[idxb[i]];
2379     }
2380   }
2381 
2382   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2383   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2384   ierr = PetscFree(idxb);CHKERRQ(ierr);
2385   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2390 {
2391   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2392   PetscInt       n      = A->rmap->n;
2393   PetscInt       cstart = A->cmap->rstart;
2394   PetscInt       *cmap  = mat->garray;
2395   PetscInt       *diagIdx, *offdiagIdx;
2396   Vec            diagV, offdiagV;
2397   PetscScalar    *a, *diagA, *offdiagA;
2398   PetscInt       r;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2403   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2404   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2405   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2406   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2407   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2408   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2409   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2410   for (r = 0; r < n; ++r) {
2411     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2412       a[r]   = diagA[r];
2413       idx[r] = cstart + diagIdx[r];
2414     } else {
2415       a[r]   = offdiagA[r];
2416       idx[r] = cmap[offdiagIdx[r]];
2417     }
2418   }
2419   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2420   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2421   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2422   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2423   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2424   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2429 {
2430   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2431   PetscInt       n      = A->rmap->n;
2432   PetscInt       cstart = A->cmap->rstart;
2433   PetscInt       *cmap  = mat->garray;
2434   PetscInt       *diagIdx, *offdiagIdx;
2435   Vec            diagV, offdiagV;
2436   PetscScalar    *a, *diagA, *offdiagA;
2437   PetscInt       r;
2438   PetscErrorCode ierr;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2442   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2443   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2444   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2445   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2447   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2448   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2449   for (r = 0; r < n; ++r) {
2450     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2451       a[r]   = diagA[r];
2452       idx[r] = cstart + diagIdx[r];
2453     } else {
2454       a[r]   = offdiagA[r];
2455       idx[r] = cmap[offdiagIdx[r]];
2456     }
2457   }
2458   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2459   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2460   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2461   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2462   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2463   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2468 {
2469   PetscErrorCode ierr;
2470   Mat            *dummy;
2471 
2472   PetscFunctionBegin;
2473   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2474   *newmat = *dummy;
2475   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2480 {
2481   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2486   A->factorerrortype = a->A->factorerrortype;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2491 {
2492   PetscErrorCode ierr;
2493   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2494 
2495   PetscFunctionBegin;
2496   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2497   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2498   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2499   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2504 {
2505   PetscFunctionBegin;
2506   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2507   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 /*@
2512    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2513 
2514    Collective on Mat
2515 
2516    Input Parameters:
2517 +    A - the matrix
2518 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2519 
2520  Level: advanced
2521 
2522 @*/
2523 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2524 {
2525   PetscErrorCode       ierr;
2526 
2527   PetscFunctionBegin;
2528   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2533 {
2534   PetscErrorCode       ierr;
2535   PetscBool            sc = PETSC_FALSE,flg;
2536 
2537   PetscFunctionBegin;
2538   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2539   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2540   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2541   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2542   if (flg) {
2543     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2544   }
2545   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2550 {
2551   PetscErrorCode ierr;
2552   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2553   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2554 
2555   PetscFunctionBegin;
2556   if (!Y->preallocated) {
2557     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2558   } else if (!aij->nz) {
2559     PetscInt nonew = aij->nonew;
2560     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2561     aij->nonew = nonew;
2562   }
2563   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2568 {
2569   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2570   PetscErrorCode ierr;
2571 
2572   PetscFunctionBegin;
2573   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2574   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2575   if (d) {
2576     PetscInt rstart;
2577     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2578     *d += rstart;
2579 
2580   }
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 
2585 /* -------------------------------------------------------------------*/
2586 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2587                                        MatGetRow_MPIAIJ,
2588                                        MatRestoreRow_MPIAIJ,
2589                                        MatMult_MPIAIJ,
2590                                 /* 4*/ MatMultAdd_MPIAIJ,
2591                                        MatMultTranspose_MPIAIJ,
2592                                        MatMultTransposeAdd_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*10*/ 0,
2597                                        0,
2598                                        0,
2599                                        MatSOR_MPIAIJ,
2600                                        MatTranspose_MPIAIJ,
2601                                 /*15*/ MatGetInfo_MPIAIJ,
2602                                        MatEqual_MPIAIJ,
2603                                        MatGetDiagonal_MPIAIJ,
2604                                        MatDiagonalScale_MPIAIJ,
2605                                        MatNorm_MPIAIJ,
2606                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2607                                        MatAssemblyEnd_MPIAIJ,
2608                                        MatSetOption_MPIAIJ,
2609                                        MatZeroEntries_MPIAIJ,
2610                                 /*24*/ MatZeroRows_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*29*/ MatSetUp_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        MatGetDiagonalBlock_MPIAIJ,
2619                                        0,
2620                                 /*34*/ MatDuplicate_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*39*/ MatAXPY_MPIAIJ,
2626                                        MatCreateSubMatrices_MPIAIJ,
2627                                        MatIncreaseOverlap_MPIAIJ,
2628                                        MatGetValues_MPIAIJ,
2629                                        MatCopy_MPIAIJ,
2630                                 /*44*/ MatGetRowMax_MPIAIJ,
2631                                        MatScale_MPIAIJ,
2632                                        MatShift_MPIAIJ,
2633                                        MatDiagonalSet_MPIAIJ,
2634                                        MatZeroRowsColumns_MPIAIJ,
2635                                 /*49*/ MatSetRandom_MPIAIJ,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2641                                        0,
2642                                        MatSetUnfactored_MPIAIJ,
2643                                        MatPermute_MPIAIJ,
2644                                        0,
2645                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2646                                        MatDestroy_MPIAIJ,
2647                                        MatView_MPIAIJ,
2648                                        0,
2649                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2650                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2651                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2656                                        MatGetRowMinAbs_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*75*/ MatFDColoringApply_AIJ,
2662                                        MatSetFromOptions_MPIAIJ,
2663                                        0,
2664                                        0,
2665                                        MatFindZeroDiagonals_MPIAIJ,
2666                                 /*80*/ 0,
2667                                        0,
2668                                        0,
2669                                 /*83*/ MatLoad_MPIAIJ,
2670                                        MatIsSymmetric_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2676                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2677                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2678                                        MatPtAP_MPIAIJ_MPIAIJ,
2679                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2680                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*99*/ 0,
2686                                        0,
2687                                        0,
2688                                        MatConjugate_MPIAIJ,
2689                                        0,
2690                                 /*104*/MatSetValuesRow_MPIAIJ,
2691                                        MatRealPart_MPIAIJ,
2692                                        MatImaginaryPart_MPIAIJ,
2693                                        0,
2694                                        0,
2695                                 /*109*/0,
2696                                        0,
2697                                        MatGetRowMin_MPIAIJ,
2698                                        0,
2699                                        MatMissingDiagonal_MPIAIJ,
2700                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2701                                        0,
2702                                        MatGetGhosts_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                 /*119*/0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        MatGetMultiProcBlock_MPIAIJ,
2710                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2711                                        MatGetColumnNorms_MPIAIJ,
2712                                        MatInvertBlockDiagonal_MPIAIJ,
2713                                        0,
2714                                        MatCreateSubMatricesMPI_MPIAIJ,
2715                                 /*129*/0,
2716                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2717                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2718                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2719                                        0,
2720                                 /*134*/0,
2721                                        0,
2722                                        MatRARt_MPIAIJ_MPIAIJ,
2723                                        0,
2724                                        0,
2725                                 /*139*/MatSetBlockSizes_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                        MatFDColoringSetUp_MPIXAIJ,
2729                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2730                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2731 };
2732 
2733 /* ----------------------------------------------------------------------------------------*/
2734 
2735 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2736 {
2737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2742   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2747 {
2748   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2753   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2758 {
2759   Mat_MPIAIJ     *b;
2760   PetscErrorCode ierr;
2761 
2762   PetscFunctionBegin;
2763   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2765   b = (Mat_MPIAIJ*)B->data;
2766 
2767 #if defined(PETSC_USE_CTABLE)
2768   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2769 #else
2770   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2771 #endif
2772   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2773   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2774   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2775 
2776   /* Because the B will have been resized we simply destroy it and create a new one each time */
2777   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2778   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2779   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2780   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2781   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2782   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2783 
2784   if (!B->preallocated) {
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2787     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2788     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2789     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2790   }
2791 
2792   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2793   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2794   B->preallocated  = PETSC_TRUE;
2795   B->was_assembled = PETSC_FALSE;
2796   B->assembled     = PETSC_FALSE;;
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2801 {
2802   Mat_MPIAIJ     *b;
2803   PetscErrorCode ierr;
2804 
2805   PetscFunctionBegin;
2806   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2807   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2808   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2809   b = (Mat_MPIAIJ*)B->data;
2810 
2811 #if defined(PETSC_USE_CTABLE)
2812   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2813 #else
2814   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2815 #endif
2816   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2817   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2818   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2819 
2820   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2821   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2822   B->preallocated  = PETSC_TRUE;
2823   B->was_assembled = PETSC_FALSE;
2824   B->assembled = PETSC_FALSE;
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2829 {
2830   Mat            mat;
2831   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   *newmat = 0;
2836   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2837   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2838   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2839   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2840   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2841   a       = (Mat_MPIAIJ*)mat->data;
2842 
2843   mat->factortype   = matin->factortype;
2844   mat->assembled    = PETSC_TRUE;
2845   mat->insertmode   = NOT_SET_VALUES;
2846   mat->preallocated = PETSC_TRUE;
2847 
2848   a->size         = oldmat->size;
2849   a->rank         = oldmat->rank;
2850   a->donotstash   = oldmat->donotstash;
2851   a->roworiented  = oldmat->roworiented;
2852   a->rowindices   = 0;
2853   a->rowvalues    = 0;
2854   a->getrowactive = PETSC_FALSE;
2855 
2856   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2857   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2858 
2859   if (oldmat->colmap) {
2860 #if defined(PETSC_USE_CTABLE)
2861     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2862 #else
2863     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2864     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2865     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2866 #endif
2867   } else a->colmap = 0;
2868   if (oldmat->garray) {
2869     PetscInt len;
2870     len  = oldmat->B->cmap->n;
2871     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2872     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2873     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2874   } else a->garray = 0;
2875 
2876   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2877   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2878   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2879   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2880 
2881   if (oldmat->Mvctx_mpi1) {
2882     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2883     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2884   }
2885 
2886   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2887   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2888   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2889   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2890   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2891   *newmat = mat;
2892   PetscFunctionReturn(0);
2893 }
2894 
2895 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2896 {
2897   PetscScalar    *vals,*svals;
2898   MPI_Comm       comm;
2899   PetscErrorCode ierr;
2900   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2901   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2902   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2903   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2904   PetscInt       cend,cstart,n,*rowners;
2905   int            fd;
2906   PetscInt       bs = newMat->rmap->bs;
2907 
2908   PetscFunctionBegin;
2909   /* force binary viewer to load .info file if it has not yet done so */
2910   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2911   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2912   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2913   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2914   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2915   if (!rank) {
2916     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2917     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2918     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2919   }
2920 
2921   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2922   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2923   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2924   if (bs < 0) bs = 1;
2925 
2926   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2927   M    = header[1]; N = header[2];
2928 
2929   /* If global sizes are set, check if they are consistent with that given in the file */
2930   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2931   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2932 
2933   /* determine ownership of all (block) rows */
2934   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2935   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2936   else m = newMat->rmap->n; /* Set by user */
2937 
2938   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2939   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2940 
2941   /* First process needs enough room for process with most rows */
2942   if (!rank) {
2943     mmax = rowners[1];
2944     for (i=2; i<=size; i++) {
2945       mmax = PetscMax(mmax, rowners[i]);
2946     }
2947   } else mmax = -1;             /* unused, but compilers complain */
2948 
2949   rowners[0] = 0;
2950   for (i=2; i<=size; i++) {
2951     rowners[i] += rowners[i-1];
2952   }
2953   rstart = rowners[rank];
2954   rend   = rowners[rank+1];
2955 
2956   /* distribute row lengths to all processors */
2957   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2958   if (!rank) {
2959     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2960     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2961     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2962     for (j=0; j<m; j++) {
2963       procsnz[0] += ourlens[j];
2964     }
2965     for (i=1; i<size; i++) {
2966       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2967       /* calculate the number of nonzeros on each processor */
2968       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2969         procsnz[i] += rowlengths[j];
2970       }
2971       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2972     }
2973     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2974   } else {
2975     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2976   }
2977 
2978   if (!rank) {
2979     /* determine max buffer needed and allocate it */
2980     maxnz = 0;
2981     for (i=0; i<size; i++) {
2982       maxnz = PetscMax(maxnz,procsnz[i]);
2983     }
2984     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2985 
2986     /* read in my part of the matrix column indices  */
2987     nz   = procsnz[0];
2988     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2989     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2990 
2991     /* read in every one elses and ship off */
2992     for (i=1; i<size; i++) {
2993       nz   = procsnz[i];
2994       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2995       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2996     }
2997     ierr = PetscFree(cols);CHKERRQ(ierr);
2998   } else {
2999     /* determine buffer space needed for message */
3000     nz = 0;
3001     for (i=0; i<m; i++) {
3002       nz += ourlens[i];
3003     }
3004     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3005 
3006     /* receive message of column indices*/
3007     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3008   }
3009 
3010   /* determine column ownership if matrix is not square */
3011   if (N != M) {
3012     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3013     else n = newMat->cmap->n;
3014     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3015     cstart = cend - n;
3016   } else {
3017     cstart = rstart;
3018     cend   = rend;
3019     n      = cend - cstart;
3020   }
3021 
3022   /* loop over local rows, determining number of off diagonal entries */
3023   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3024   jj   = 0;
3025   for (i=0; i<m; i++) {
3026     for (j=0; j<ourlens[i]; j++) {
3027       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3028       jj++;
3029     }
3030   }
3031 
3032   for (i=0; i<m; i++) {
3033     ourlens[i] -= offlens[i];
3034   }
3035   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3036 
3037   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3038 
3039   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3040 
3041   for (i=0; i<m; i++) {
3042     ourlens[i] += offlens[i];
3043   }
3044 
3045   if (!rank) {
3046     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3047 
3048     /* read in my part of the matrix numerical values  */
3049     nz   = procsnz[0];
3050     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3051 
3052     /* insert into matrix */
3053     jj      = rstart;
3054     smycols = mycols;
3055     svals   = vals;
3056     for (i=0; i<m; i++) {
3057       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3058       smycols += ourlens[i];
3059       svals   += ourlens[i];
3060       jj++;
3061     }
3062 
3063     /* read in other processors and ship out */
3064     for (i=1; i<size; i++) {
3065       nz   = procsnz[i];
3066       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3067       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3068     }
3069     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3070   } else {
3071     /* receive numeric values */
3072     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3073 
3074     /* receive message of values*/
3075     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3076 
3077     /* insert into matrix */
3078     jj      = rstart;
3079     smycols = mycols;
3080     svals   = vals;
3081     for (i=0; i<m; i++) {
3082       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3083       smycols += ourlens[i];
3084       svals   += ourlens[i];
3085       jj++;
3086     }
3087   }
3088   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3089   ierr = PetscFree(vals);CHKERRQ(ierr);
3090   ierr = PetscFree(mycols);CHKERRQ(ierr);
3091   ierr = PetscFree(rowners);CHKERRQ(ierr);
3092   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3093   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 /* Not scalable because of ISAllGather() unless getting all columns. */
3098 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3099 {
3100   PetscErrorCode ierr;
3101   IS             iscol_local;
3102   PetscBool      isstride;
3103   PetscMPIInt    lisstride=0,gisstride;
3104 
3105   PetscFunctionBegin;
3106   /* check if we are grabbing all columns*/
3107   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3108 
3109   if (isstride) {
3110     PetscInt  start,len,mstart,mlen;
3111     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3112     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3113     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3114     if (mstart == start && mlen-mstart == len) lisstride = 1;
3115   }
3116 
3117   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3118   if (gisstride) {
3119     PetscInt N;
3120     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3121     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3122     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3123     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3124   } else {
3125     PetscInt cbs;
3126     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3127     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3128     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3129   }
3130 
3131   *isseq = iscol_local;
3132   PetscFunctionReturn(0);
3133 }
3134 
3135 /*
3136  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3137  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3138 
3139  Input Parameters:
3140    mat - matrix
3141    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3142            i.e., mat->rstart <= isrow[i] < mat->rend
3143    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3144            i.e., mat->cstart <= iscol[i] < mat->cend
3145  Output Parameter:
3146    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3147    iscol_o - sequential column index set for retrieving mat->B
3148    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3149  */
3150 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3151 {
3152   PetscErrorCode ierr;
3153   Vec            x,cmap;
3154   const PetscInt *is_idx;
3155   PetscScalar    *xarray,*cmaparray;
3156   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3157   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3158   Mat            B=a->B;
3159   Vec            lvec=a->lvec,lcmap;
3160   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3161   MPI_Comm       comm;
3162   VecScatter     Mvctx=a->Mvctx;
3163 
3164   PetscFunctionBegin;
3165   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3166   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3167 
3168   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3169   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3170   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3171   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3172   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3173 
3174   /* Get start indices */
3175   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3176   isstart -= ncols;
3177   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3178 
3179   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3180   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3181   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3182   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3183   for (i=0; i<ncols; i++) {
3184     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3185     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3186     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3187   }
3188   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3189   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3190   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3191 
3192   /* Get iscol_d */
3193   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3194   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3195   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3196 
3197   /* Get isrow_d */
3198   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3199   rstart = mat->rmap->rstart;
3200   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3201   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3202   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3203   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3204 
3205   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3206   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3207   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3208 
3209   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3210   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3211   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3212 
3213   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3214 
3215   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3216   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3217 
3218   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3219   /* off-process column indices */
3220   count = 0;
3221   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3222   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3223 
3224   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3225   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3226   for (i=0; i<Bn; i++) {
3227     if (PetscRealPart(xarray[i]) > -1.0) {
3228       idx[count]     = i;                   /* local column index in off-diagonal part B */
3229       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3230       count++;
3231     }
3232   }
3233   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3234   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3235 
3236   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3237   /* cannot ensure iscol_o has same blocksize as iscol! */
3238 
3239   ierr = PetscFree(idx);CHKERRQ(ierr);
3240   *garray = cmap1;
3241 
3242   ierr = VecDestroy(&x);CHKERRQ(ierr);
3243   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3244   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3249 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3250 {
3251   PetscErrorCode ierr;
3252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3253   Mat            M = NULL;
3254   MPI_Comm       comm;
3255   IS             iscol_d,isrow_d,iscol_o;
3256   Mat            Asub = NULL,Bsub = NULL;
3257   PetscInt       n;
3258 
3259   PetscFunctionBegin;
3260   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3261 
3262   if (call == MAT_REUSE_MATRIX) {
3263     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3264     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3265     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3266 
3267     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3268     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3269 
3270     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3271     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3272 
3273     /* Update diagonal and off-diagonal portions of submat */
3274     asub = (Mat_MPIAIJ*)(*submat)->data;
3275     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3276     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3277     if (n) {
3278       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3279     }
3280     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3281     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3282 
3283   } else { /* call == MAT_INITIAL_MATRIX) */
3284     const PetscInt *garray;
3285     PetscInt        BsubN;
3286 
3287     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3288     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3289 
3290     /* Create local submatrices Asub and Bsub */
3291     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3292     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3293 
3294     /* Create submatrix M */
3295     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3296 
3297     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3298     asub = (Mat_MPIAIJ*)M->data;
3299 
3300     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3301     n = asub->B->cmap->N;
3302     if (BsubN > n) {
3303       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3304       const PetscInt *idx;
3305       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3306       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3307 
3308       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3309       j = 0;
3310       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3311       for (i=0; i<n; i++) {
3312         if (j >= BsubN) break;
3313         while (subgarray[i] > garray[j]) j++;
3314 
3315         if (subgarray[i] == garray[j]) {
3316           idx_new[i] = idx[j++];
3317         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3318       }
3319       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3320 
3321       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3322       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3323 
3324     } else if (BsubN < n) {
3325       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3326     }
3327 
3328     ierr = PetscFree(garray);CHKERRQ(ierr);
3329     *submat = M;
3330 
3331     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3332     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3333     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3334 
3335     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3336     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3337 
3338     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3339     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3340   }
3341   PetscFunctionReturn(0);
3342 }
3343 
3344 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3345 {
3346   PetscErrorCode ierr;
3347   IS             iscol_local=NULL,isrow_d;
3348   PetscInt       csize;
3349   PetscInt       n,i,j,start,end;
3350   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3351   MPI_Comm       comm;
3352 
3353   PetscFunctionBegin;
3354   /* If isrow has same processor distribution as mat,
3355      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3356   if (call == MAT_REUSE_MATRIX) {
3357     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3358     if (isrow_d) {
3359       sameRowDist  = PETSC_TRUE;
3360       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3361     } else {
3362       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3363       if (iscol_local) {
3364         sameRowDist  = PETSC_TRUE;
3365         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3366       }
3367     }
3368   } else {
3369     /* Check if isrow has same processor distribution as mat */
3370     sameDist[0] = PETSC_FALSE;
3371     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3372     if (!n) {
3373       sameDist[0] = PETSC_TRUE;
3374     } else {
3375       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3376       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3377       if (i >= start && j < end) {
3378         sameDist[0] = PETSC_TRUE;
3379       }
3380     }
3381 
3382     /* Check if iscol has same processor distribution as mat */
3383     sameDist[1] = PETSC_FALSE;
3384     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3385     if (!n) {
3386       sameDist[1] = PETSC_TRUE;
3387     } else {
3388       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3389       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3390       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3391     }
3392 
3393     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3394     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3395     sameRowDist = tsameDist[0];
3396   }
3397 
3398   if (sameRowDist) {
3399     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3400       /* isrow and iscol have same processor distribution as mat */
3401       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3402       PetscFunctionReturn(0);
3403     } else { /* sameRowDist */
3404       /* isrow has same processor distribution as mat */
3405       if (call == MAT_INITIAL_MATRIX) {
3406         PetscBool sorted;
3407         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3408         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3409         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3410         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3411 
3412         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3413         if (sorted) {
3414           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3415           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3416           PetscFunctionReturn(0);
3417         }
3418       } else { /* call == MAT_REUSE_MATRIX */
3419         IS    iscol_sub;
3420         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3421         if (iscol_sub) {
3422           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3423           PetscFunctionReturn(0);
3424         }
3425       }
3426     }
3427   }
3428 
3429   /* General case: iscol -> iscol_local which has global size of iscol */
3430   if (call == MAT_REUSE_MATRIX) {
3431     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3432     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3433   } else {
3434     if (!iscol_local) {
3435       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3436     }
3437   }
3438 
3439   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3440   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3441 
3442   if (call == MAT_INITIAL_MATRIX) {
3443     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3444     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3445   }
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 /*@C
3450      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3451          and "off-diagonal" part of the matrix in CSR format.
3452 
3453    Collective on MPI_Comm
3454 
3455    Input Parameters:
3456 +  comm - MPI communicator
3457 .  A - "diagonal" portion of matrix
3458 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3459 -  garray - global index of B columns
3460 
3461    Output Parameter:
3462 .   mat - the matrix, with input A as its local diagonal matrix
3463    Level: advanced
3464 
3465    Notes:
3466        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3467        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3468 
3469 .seealso: MatCreateMPIAIJWithSplitArrays()
3470 @*/
3471 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3472 {
3473   PetscErrorCode ierr;
3474   Mat_MPIAIJ     *maij;
3475   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3476   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3477   PetscScalar    *oa=b->a;
3478   Mat            Bnew;
3479   PetscInt       m,n,N;
3480 
3481   PetscFunctionBegin;
3482   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3483   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3484   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3485   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3486   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3487   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3488 
3489   /* Get global columns of mat */
3490   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3491 
3492   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3493   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3494   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3495   maij = (Mat_MPIAIJ*)(*mat)->data;
3496 
3497   (*mat)->preallocated = PETSC_TRUE;
3498 
3499   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3500   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3501 
3502   /* Set A as diagonal portion of *mat */
3503   maij->A = A;
3504 
3505   nz = oi[m];
3506   for (i=0; i<nz; i++) {
3507     col   = oj[i];
3508     oj[i] = garray[col];
3509   }
3510 
3511    /* Set Bnew as off-diagonal portion of *mat */
3512   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3513   bnew        = (Mat_SeqAIJ*)Bnew->data;
3514   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3515   maij->B     = Bnew;
3516 
3517   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3518 
3519   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3520   b->free_a       = PETSC_FALSE;
3521   b->free_ij      = PETSC_FALSE;
3522   ierr = MatDestroy(&B);CHKERRQ(ierr);
3523 
3524   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3525   bnew->free_a       = PETSC_TRUE;
3526   bnew->free_ij      = PETSC_TRUE;
3527 
3528   /* condense columns of maij->B */
3529   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3530   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3531   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3532   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3533   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3538 
3539 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3540 {
3541   PetscErrorCode ierr;
3542   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3543   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3544   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3545   Mat            M,Msub,B=a->B;
3546   MatScalar      *aa;
3547   Mat_SeqAIJ     *aij;
3548   PetscInt       *garray = a->garray,*colsub,Ncols;
3549   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3550   IS             iscol_sub,iscmap;
3551   const PetscInt *is_idx,*cmap;
3552   PetscBool      allcolumns=PETSC_FALSE;
3553   MPI_Comm       comm;
3554 
3555   PetscFunctionBegin;
3556   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3557 
3558   if (call == MAT_REUSE_MATRIX) {
3559     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3560     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3561     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3562 
3563     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3564     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3565 
3566     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3567     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3568 
3569     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3570 
3571   } else { /* call == MAT_INITIAL_MATRIX) */
3572     PetscBool flg;
3573 
3574     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3575     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3576 
3577     /* (1) iscol -> nonscalable iscol_local */
3578     /* Check for special case: each processor gets entire matrix columns */
3579     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3580     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3581     if (allcolumns) {
3582       iscol_sub = iscol_local;
3583       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3584       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3585 
3586     } else {
3587       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3588       PetscInt *idx,*cmap1,k;
3589       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3590       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3591       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3592       count = 0;
3593       k     = 0;
3594       for (i=0; i<Ncols; i++) {
3595         j = is_idx[i];
3596         if (j >= cstart && j < cend) {
3597           /* diagonal part of mat */
3598           idx[count]     = j;
3599           cmap1[count++] = i; /* column index in submat */
3600         } else if (Bn) {
3601           /* off-diagonal part of mat */
3602           if (j == garray[k]) {
3603             idx[count]     = j;
3604             cmap1[count++] = i;  /* column index in submat */
3605           } else if (j > garray[k]) {
3606             while (j > garray[k] && k < Bn-1) k++;
3607             if (j == garray[k]) {
3608               idx[count]     = j;
3609               cmap1[count++] = i; /* column index in submat */
3610             }
3611           }
3612         }
3613       }
3614       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3615 
3616       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3617       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3618       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3619 
3620       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3621     }
3622 
3623     /* (3) Create sequential Msub */
3624     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3625   }
3626 
3627   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3628   aij  = (Mat_SeqAIJ*)(Msub)->data;
3629   ii   = aij->i;
3630   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3631 
3632   /*
3633       m - number of local rows
3634       Ncols - number of columns (same on all processors)
3635       rstart - first row in new global matrix generated
3636   */
3637   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3638 
3639   if (call == MAT_INITIAL_MATRIX) {
3640     /* (4) Create parallel newmat */
3641     PetscMPIInt    rank,size;
3642     PetscInt       csize;
3643 
3644     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3645     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3646 
3647     /*
3648         Determine the number of non-zeros in the diagonal and off-diagonal
3649         portions of the matrix in order to do correct preallocation
3650     */
3651 
3652     /* first get start and end of "diagonal" columns */
3653     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3654     if (csize == PETSC_DECIDE) {
3655       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3656       if (mglobal == Ncols) { /* square matrix */
3657         nlocal = m;
3658       } else {
3659         nlocal = Ncols/size + ((Ncols % size) > rank);
3660       }
3661     } else {
3662       nlocal = csize;
3663     }
3664     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3665     rstart = rend - nlocal;
3666     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3667 
3668     /* next, compute all the lengths */
3669     jj    = aij->j;
3670     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3671     olens = dlens + m;
3672     for (i=0; i<m; i++) {
3673       jend = ii[i+1] - ii[i];
3674       olen = 0;
3675       dlen = 0;
3676       for (j=0; j<jend; j++) {
3677         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3678         else dlen++;
3679         jj++;
3680       }
3681       olens[i] = olen;
3682       dlens[i] = dlen;
3683     }
3684 
3685     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3686     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3687 
3688     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3689     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3690     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3691     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3692     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3693     ierr = PetscFree(dlens);CHKERRQ(ierr);
3694 
3695   } else { /* call == MAT_REUSE_MATRIX */
3696     M    = *newmat;
3697     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3698     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3699     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3700     /*
3701          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3702        rather than the slower MatSetValues().
3703     */
3704     M->was_assembled = PETSC_TRUE;
3705     M->assembled     = PETSC_FALSE;
3706   }
3707 
3708   /* (5) Set values of Msub to *newmat */
3709   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3710   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3711 
3712   jj   = aij->j;
3713   aa   = aij->a;
3714   for (i=0; i<m; i++) {
3715     row = rstart + i;
3716     nz  = ii[i+1] - ii[i];
3717     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3718     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3719     jj += nz; aa += nz;
3720   }
3721   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3722 
3723   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3724   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3725 
3726   ierr = PetscFree(colsub);CHKERRQ(ierr);
3727 
3728   /* save Msub, iscol_sub and iscmap used in processor for next request */
3729   if (call ==  MAT_INITIAL_MATRIX) {
3730     *newmat = M;
3731     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3732     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3733 
3734     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3735     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3736 
3737     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3738     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3739 
3740     if (iscol_local) {
3741       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3742       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3743     }
3744   }
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 /*
3749     Not great since it makes two copies of the submatrix, first an SeqAIJ
3750   in local and then by concatenating the local matrices the end result.
3751   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3752 
3753   Note: This requires a sequential iscol with all indices.
3754 */
3755 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3756 {
3757   PetscErrorCode ierr;
3758   PetscMPIInt    rank,size;
3759   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3760   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3761   Mat            M,Mreuse;
3762   MatScalar      *aa,*vwork;
3763   MPI_Comm       comm;
3764   Mat_SeqAIJ     *aij;
3765   PetscBool      colflag,allcolumns=PETSC_FALSE;
3766 
3767   PetscFunctionBegin;
3768   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3769   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3770   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3771 
3772   /* Check for special case: each processor gets entire matrix columns */
3773   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3774   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3775   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3776 
3777   if (call ==  MAT_REUSE_MATRIX) {
3778     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3779     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3780     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3781   } else {
3782     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3783   }
3784 
3785   /*
3786       m - number of local rows
3787       n - number of columns (same on all processors)
3788       rstart - first row in new global matrix generated
3789   */
3790   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3791   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3792   if (call == MAT_INITIAL_MATRIX) {
3793     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3794     ii  = aij->i;
3795     jj  = aij->j;
3796 
3797     /*
3798         Determine the number of non-zeros in the diagonal and off-diagonal
3799         portions of the matrix in order to do correct preallocation
3800     */
3801 
3802     /* first get start and end of "diagonal" columns */
3803     if (csize == PETSC_DECIDE) {
3804       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3805       if (mglobal == n) { /* square matrix */
3806         nlocal = m;
3807       } else {
3808         nlocal = n/size + ((n % size) > rank);
3809       }
3810     } else {
3811       nlocal = csize;
3812     }
3813     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3814     rstart = rend - nlocal;
3815     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3816 
3817     /* next, compute all the lengths */
3818     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3819     olens = dlens + m;
3820     for (i=0; i<m; i++) {
3821       jend = ii[i+1] - ii[i];
3822       olen = 0;
3823       dlen = 0;
3824       for (j=0; j<jend; j++) {
3825         if (*jj < rstart || *jj >= rend) olen++;
3826         else dlen++;
3827         jj++;
3828       }
3829       olens[i] = olen;
3830       dlens[i] = dlen;
3831     }
3832     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3833     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3834     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3835     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3836     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3837     ierr = PetscFree(dlens);CHKERRQ(ierr);
3838   } else {
3839     PetscInt ml,nl;
3840 
3841     M    = *newmat;
3842     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3843     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3844     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3845     /*
3846          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3847        rather than the slower MatSetValues().
3848     */
3849     M->was_assembled = PETSC_TRUE;
3850     M->assembled     = PETSC_FALSE;
3851   }
3852   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3853   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3854   ii   = aij->i;
3855   jj   = aij->j;
3856   aa   = aij->a;
3857   for (i=0; i<m; i++) {
3858     row   = rstart + i;
3859     nz    = ii[i+1] - ii[i];
3860     cwork = jj;     jj += nz;
3861     vwork = aa;     aa += nz;
3862     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3863   }
3864 
3865   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   *newmat = M;
3868 
3869   /* save submatrix used in processor for next request */
3870   if (call ==  MAT_INITIAL_MATRIX) {
3871     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3872     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3873   }
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3878 {
3879   PetscInt       m,cstart, cend,j,nnz,i,d;
3880   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3881   const PetscInt *JJ;
3882   PetscScalar    *values;
3883   PetscErrorCode ierr;
3884   PetscBool      nooffprocentries;
3885 
3886   PetscFunctionBegin;
3887   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3888 
3889   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3890   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3891   m      = B->rmap->n;
3892   cstart = B->cmap->rstart;
3893   cend   = B->cmap->rend;
3894   rstart = B->rmap->rstart;
3895 
3896   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3897 
3898 #if defined(PETSC_USE_DEBUG)
3899   for (i=0; i<m; i++) {
3900     nnz = Ii[i+1]- Ii[i];
3901     JJ  = J + Ii[i];
3902     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3903     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3904     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3905   }
3906 #endif
3907 
3908   for (i=0; i<m; i++) {
3909     nnz     = Ii[i+1]- Ii[i];
3910     JJ      = J + Ii[i];
3911     nnz_max = PetscMax(nnz_max,nnz);
3912     d       = 0;
3913     for (j=0; j<nnz; j++) {
3914       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3915     }
3916     d_nnz[i] = d;
3917     o_nnz[i] = nnz - d;
3918   }
3919   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3920   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3921 
3922   if (v) values = (PetscScalar*)v;
3923   else {
3924     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3925   }
3926 
3927   for (i=0; i<m; i++) {
3928     ii   = i + rstart;
3929     nnz  = Ii[i+1]- Ii[i];
3930     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3931   }
3932   nooffprocentries    = B->nooffprocentries;
3933   B->nooffprocentries = PETSC_TRUE;
3934   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   B->nooffprocentries = nooffprocentries;
3937 
3938   if (!v) {
3939     ierr = PetscFree(values);CHKERRQ(ierr);
3940   }
3941   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3942   PetscFunctionReturn(0);
3943 }
3944 
3945 /*@
3946    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3947    (the default parallel PETSc format).
3948 
3949    Collective on MPI_Comm
3950 
3951    Input Parameters:
3952 +  B - the matrix
3953 .  i - the indices into j for the start of each local row (starts with zero)
3954 .  j - the column indices for each local row (starts with zero)
3955 -  v - optional values in the matrix
3956 
3957    Level: developer
3958 
3959    Notes:
3960        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3961      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3962      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3963 
3964        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3965 
3966        The format which is used for the sparse matrix input, is equivalent to a
3967     row-major ordering.. i.e for the following matrix, the input data expected is
3968     as shown
3969 
3970 $        1 0 0
3971 $        2 0 3     P0
3972 $       -------
3973 $        4 5 6     P1
3974 $
3975 $     Process0 [P0]: rows_owned=[0,1]
3976 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3977 $        j =  {0,0,2}  [size = 3]
3978 $        v =  {1,2,3}  [size = 3]
3979 $
3980 $     Process1 [P1]: rows_owned=[2]
3981 $        i =  {0,3}    [size = nrow+1  = 1+1]
3982 $        j =  {0,1,2}  [size = 3]
3983 $        v =  {4,5,6}  [size = 3]
3984 
3985 .keywords: matrix, aij, compressed row, sparse, parallel
3986 
3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3988           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3989 @*/
3990 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3991 {
3992   PetscErrorCode ierr;
3993 
3994   PetscFunctionBegin;
3995   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3996   PetscFunctionReturn(0);
3997 }
3998 
3999 /*@C
4000    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4001    (the default parallel PETSc format).  For good matrix assembly performance
4002    the user should preallocate the matrix storage by setting the parameters
4003    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4004    performance can be increased by more than a factor of 50.
4005 
4006    Collective on MPI_Comm
4007 
4008    Input Parameters:
4009 +  B - the matrix
4010 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4011            (same value is used for all local rows)
4012 .  d_nnz - array containing the number of nonzeros in the various rows of the
4013            DIAGONAL portion of the local submatrix (possibly different for each row)
4014            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4015            The size of this array is equal to the number of local rows, i.e 'm'.
4016            For matrices that will be factored, you must leave room for (and set)
4017            the diagonal entry even if it is zero.
4018 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4019            submatrix (same value is used for all local rows).
4020 -  o_nnz - array containing the number of nonzeros in the various rows of the
4021            OFF-DIAGONAL portion of the local submatrix (possibly different for
4022            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4023            structure. The size of this array is equal to the number
4024            of local rows, i.e 'm'.
4025 
4026    If the *_nnz parameter is given then the *_nz parameter is ignored
4027 
4028    The AIJ format (also called the Yale sparse matrix format or
4029    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4030    storage.  The stored row and column indices begin with zero.
4031    See Users-Manual: ch_mat for details.
4032 
4033    The parallel matrix is partitioned such that the first m0 rows belong to
4034    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4035    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4036 
4037    The DIAGONAL portion of the local submatrix of a processor can be defined
4038    as the submatrix which is obtained by extraction the part corresponding to
4039    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4040    first row that belongs to the processor, r2 is the last row belonging to
4041    the this processor, and c1-c2 is range of indices of the local part of a
4042    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4043    common case of a square matrix, the row and column ranges are the same and
4044    the DIAGONAL part is also square. The remaining portion of the local
4045    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4046 
4047    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4048 
4049    You can call MatGetInfo() to get information on how effective the preallocation was;
4050    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4051    You can also run with the option -info and look for messages with the string
4052    malloc in them to see if additional memory allocation was needed.
4053 
4054    Example usage:
4055 
4056    Consider the following 8x8 matrix with 34 non-zero values, that is
4057    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4058    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4059    as follows:
4060 
4061 .vb
4062             1  2  0  |  0  3  0  |  0  4
4063     Proc0   0  5  6  |  7  0  0  |  8  0
4064             9  0 10  | 11  0  0  | 12  0
4065     -------------------------------------
4066            13  0 14  | 15 16 17  |  0  0
4067     Proc1   0 18  0  | 19 20 21  |  0  0
4068             0  0  0  | 22 23  0  | 24  0
4069     -------------------------------------
4070     Proc2  25 26 27  |  0  0 28  | 29  0
4071            30  0  0  | 31 32 33  |  0 34
4072 .ve
4073 
4074    This can be represented as a collection of submatrices as:
4075 
4076 .vb
4077       A B C
4078       D E F
4079       G H I
4080 .ve
4081 
4082    Where the submatrices A,B,C are owned by proc0, D,E,F are
4083    owned by proc1, G,H,I are owned by proc2.
4084 
4085    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4086    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4087    The 'M','N' parameters are 8,8, and have the same values on all procs.
4088 
4089    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4090    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4091    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4092    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4093    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4094    matrix, ans [DF] as another SeqAIJ matrix.
4095 
4096    When d_nz, o_nz parameters are specified, d_nz storage elements are
4097    allocated for every row of the local diagonal submatrix, and o_nz
4098    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4099    One way to choose d_nz and o_nz is to use the max nonzerors per local
4100    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4101    In this case, the values of d_nz,o_nz are:
4102 .vb
4103      proc0 : dnz = 2, o_nz = 2
4104      proc1 : dnz = 3, o_nz = 2
4105      proc2 : dnz = 1, o_nz = 4
4106 .ve
4107    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4108    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4109    for proc3. i.e we are using 12+15+10=37 storage locations to store
4110    34 values.
4111 
4112    When d_nnz, o_nnz parameters are specified, the storage is specified
4113    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4114    In the above case the values for d_nnz,o_nnz are:
4115 .vb
4116      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4117      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4118      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4119 .ve
4120    Here the space allocated is sum of all the above values i.e 34, and
4121    hence pre-allocation is perfect.
4122 
4123    Level: intermediate
4124 
4125 .keywords: matrix, aij, compressed row, sparse, parallel
4126 
4127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4128           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4129 @*/
4130 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4131 {
4132   PetscErrorCode ierr;
4133 
4134   PetscFunctionBegin;
4135   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4136   PetscValidType(B,1);
4137   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4138   PetscFunctionReturn(0);
4139 }
4140 
4141 /*@
4142      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4143          CSR format the local rows.
4144 
4145    Collective on MPI_Comm
4146 
4147    Input Parameters:
4148 +  comm - MPI communicator
4149 .  m - number of local rows (Cannot be PETSC_DECIDE)
4150 .  n - This value should be the same as the local size used in creating the
4151        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4152        calculated if N is given) For square matrices n is almost always m.
4153 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4154 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4155 .   i - row indices
4156 .   j - column indices
4157 -   a - matrix values
4158 
4159    Output Parameter:
4160 .   mat - the matrix
4161 
4162    Level: intermediate
4163 
4164    Notes:
4165        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4166      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4167      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4168 
4169        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4170 
4171        The format which is used for the sparse matrix input, is equivalent to a
4172     row-major ordering.. i.e for the following matrix, the input data expected is
4173     as shown
4174 
4175 $        1 0 0
4176 $        2 0 3     P0
4177 $       -------
4178 $        4 5 6     P1
4179 $
4180 $     Process0 [P0]: rows_owned=[0,1]
4181 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4182 $        j =  {0,0,2}  [size = 3]
4183 $        v =  {1,2,3}  [size = 3]
4184 $
4185 $     Process1 [P1]: rows_owned=[2]
4186 $        i =  {0,3}    [size = nrow+1  = 1+1]
4187 $        j =  {0,1,2}  [size = 3]
4188 $        v =  {4,5,6}  [size = 3]
4189 
4190 .keywords: matrix, aij, compressed row, sparse, parallel
4191 
4192 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4193           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4194 @*/
4195 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4196 {
4197   PetscErrorCode ierr;
4198 
4199   PetscFunctionBegin;
4200   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4201   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4202   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4203   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4204   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4205   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4206   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4207   PetscFunctionReturn(0);
4208 }
4209 
4210 /*@C
4211    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4212    (the default parallel PETSc format).  For good matrix assembly performance
4213    the user should preallocate the matrix storage by setting the parameters
4214    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4215    performance can be increased by more than a factor of 50.
4216 
4217    Collective on MPI_Comm
4218 
4219    Input Parameters:
4220 +  comm - MPI communicator
4221 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4222            This value should be the same as the local size used in creating the
4223            y vector for the matrix-vector product y = Ax.
4224 .  n - This value should be the same as the local size used in creating the
4225        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4226        calculated if N is given) For square matrices n is almost always m.
4227 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4228 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4229 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4230            (same value is used for all local rows)
4231 .  d_nnz - array containing the number of nonzeros in the various rows of the
4232            DIAGONAL portion of the local submatrix (possibly different for each row)
4233            or NULL, if d_nz is used to specify the nonzero structure.
4234            The size of this array is equal to the number of local rows, i.e 'm'.
4235 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4236            submatrix (same value is used for all local rows).
4237 -  o_nnz - array containing the number of nonzeros in the various rows of the
4238            OFF-DIAGONAL portion of the local submatrix (possibly different for
4239            each row) or NULL, if o_nz is used to specify the nonzero
4240            structure. The size of this array is equal to the number
4241            of local rows, i.e 'm'.
4242 
4243    Output Parameter:
4244 .  A - the matrix
4245 
4246    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4247    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4248    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4249 
4250    Notes:
4251    If the *_nnz parameter is given then the *_nz parameter is ignored
4252 
4253    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4254    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4255    storage requirements for this matrix.
4256 
4257    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4258    processor than it must be used on all processors that share the object for
4259    that argument.
4260 
4261    The user MUST specify either the local or global matrix dimensions
4262    (possibly both).
4263 
4264    The parallel matrix is partitioned across processors such that the
4265    first m0 rows belong to process 0, the next m1 rows belong to
4266    process 1, the next m2 rows belong to process 2 etc.. where
4267    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4268    values corresponding to [m x N] submatrix.
4269 
4270    The columns are logically partitioned with the n0 columns belonging
4271    to 0th partition, the next n1 columns belonging to the next
4272    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4273 
4274    The DIAGONAL portion of the local submatrix on any given processor
4275    is the submatrix corresponding to the rows and columns m,n
4276    corresponding to the given processor. i.e diagonal matrix on
4277    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4278    etc. The remaining portion of the local submatrix [m x (N-n)]
4279    constitute the OFF-DIAGONAL portion. The example below better
4280    illustrates this concept.
4281 
4282    For a square global matrix we define each processor's diagonal portion
4283    to be its local rows and the corresponding columns (a square submatrix);
4284    each processor's off-diagonal portion encompasses the remainder of the
4285    local matrix (a rectangular submatrix).
4286 
4287    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4288 
4289    When calling this routine with a single process communicator, a matrix of
4290    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4291    type of communicator, use the construction mechanism
4292 .vb
4293      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4294 .ve
4295 
4296 $     MatCreate(...,&A);
4297 $     MatSetType(A,MATMPIAIJ);
4298 $     MatSetSizes(A, m,n,M,N);
4299 $     MatMPIAIJSetPreallocation(A,...);
4300 
4301    By default, this format uses inodes (identical nodes) when possible.
4302    We search for consecutive rows with the same nonzero structure, thereby
4303    reusing matrix information to achieve increased efficiency.
4304 
4305    Options Database Keys:
4306 +  -mat_no_inode  - Do not use inodes
4307 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4308 
4309 
4310 
4311    Example usage:
4312 
4313    Consider the following 8x8 matrix with 34 non-zero values, that is
4314    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4315    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4316    as follows
4317 
4318 .vb
4319             1  2  0  |  0  3  0  |  0  4
4320     Proc0   0  5  6  |  7  0  0  |  8  0
4321             9  0 10  | 11  0  0  | 12  0
4322     -------------------------------------
4323            13  0 14  | 15 16 17  |  0  0
4324     Proc1   0 18  0  | 19 20 21  |  0  0
4325             0  0  0  | 22 23  0  | 24  0
4326     -------------------------------------
4327     Proc2  25 26 27  |  0  0 28  | 29  0
4328            30  0  0  | 31 32 33  |  0 34
4329 .ve
4330 
4331    This can be represented as a collection of submatrices as
4332 
4333 .vb
4334       A B C
4335       D E F
4336       G H I
4337 .ve
4338 
4339    Where the submatrices A,B,C are owned by proc0, D,E,F are
4340    owned by proc1, G,H,I are owned by proc2.
4341 
4342    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4343    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4344    The 'M','N' parameters are 8,8, and have the same values on all procs.
4345 
4346    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4347    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4348    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4349    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4350    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4351    matrix, ans [DF] as another SeqAIJ matrix.
4352 
4353    When d_nz, o_nz parameters are specified, d_nz storage elements are
4354    allocated for every row of the local diagonal submatrix, and o_nz
4355    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4356    One way to choose d_nz and o_nz is to use the max nonzerors per local
4357    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4358    In this case, the values of d_nz,o_nz are
4359 .vb
4360      proc0 : dnz = 2, o_nz = 2
4361      proc1 : dnz = 3, o_nz = 2
4362      proc2 : dnz = 1, o_nz = 4
4363 .ve
4364    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4365    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4366    for proc3. i.e we are using 12+15+10=37 storage locations to store
4367    34 values.
4368 
4369    When d_nnz, o_nnz parameters are specified, the storage is specified
4370    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4371    In the above case the values for d_nnz,o_nnz are
4372 .vb
4373      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4374      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4375      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4376 .ve
4377    Here the space allocated is sum of all the above values i.e 34, and
4378    hence pre-allocation is perfect.
4379 
4380    Level: intermediate
4381 
4382 .keywords: matrix, aij, compressed row, sparse, parallel
4383 
4384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4385           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4386 @*/
4387 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4388 {
4389   PetscErrorCode ierr;
4390   PetscMPIInt    size;
4391 
4392   PetscFunctionBegin;
4393   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4394   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4396   if (size > 1) {
4397     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4398     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4399   } else {
4400     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4401     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4402   }
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4407 {
4408   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4409   PetscBool      flg;
4410   PetscErrorCode ierr;
4411 
4412   PetscFunctionBegin;
4413   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4414   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4415   if (Ad)     *Ad     = a->A;
4416   if (Ao)     *Ao     = a->B;
4417   if (colmap) *colmap = a->garray;
4418   PetscFunctionReturn(0);
4419 }
4420 
4421 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4422 {
4423   PetscErrorCode ierr;
4424   PetscInt       m,N,i,rstart,nnz,Ii;
4425   PetscInt       *indx;
4426   PetscScalar    *values;
4427 
4428   PetscFunctionBegin;
4429   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4430   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4431     PetscInt       *dnz,*onz,sum,bs,cbs;
4432 
4433     if (n == PETSC_DECIDE) {
4434       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4435     }
4436     /* Check sum(n) = N */
4437     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4438     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4439 
4440     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4441     rstart -= m;
4442 
4443     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4444     for (i=0; i<m; i++) {
4445       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4446       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4447       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4448     }
4449 
4450     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4451     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4452     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4453     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4454     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4455     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4456     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4457     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4458   }
4459 
4460   /* numeric phase */
4461   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4462   for (i=0; i<m; i++) {
4463     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4464     Ii   = i + rstart;
4465     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4466     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4467   }
4468   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4469   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4470   PetscFunctionReturn(0);
4471 }
4472 
4473 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4474 {
4475   PetscErrorCode    ierr;
4476   PetscMPIInt       rank;
4477   PetscInt          m,N,i,rstart,nnz;
4478   size_t            len;
4479   const PetscInt    *indx;
4480   PetscViewer       out;
4481   char              *name;
4482   Mat               B;
4483   const PetscScalar *values;
4484 
4485   PetscFunctionBegin;
4486   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4487   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4488   /* Should this be the type of the diagonal block of A? */
4489   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4490   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4491   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4492   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4493   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4494   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4495   for (i=0; i<m; i++) {
4496     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4497     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4498     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4499   }
4500   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4502 
4503   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4504   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4505   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4506   sprintf(name,"%s.%d",outfile,rank);
4507   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4508   ierr = PetscFree(name);CHKERRQ(ierr);
4509   ierr = MatView(B,out);CHKERRQ(ierr);
4510   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4511   ierr = MatDestroy(&B);CHKERRQ(ierr);
4512   PetscFunctionReturn(0);
4513 }
4514 
4515 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4516 {
4517   PetscErrorCode      ierr;
4518   Mat_Merge_SeqsToMPI *merge;
4519   PetscContainer      container;
4520 
4521   PetscFunctionBegin;
4522   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4523   if (container) {
4524     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4536     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4537     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4538     ierr = PetscFree(merge);CHKERRQ(ierr);
4539     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4540   }
4541   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4542   PetscFunctionReturn(0);
4543 }
4544 
4545 #include <../src/mat/utils/freespace.h>
4546 #include <petscbt.h>
4547 
4548 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4549 {
4550   PetscErrorCode      ierr;
4551   MPI_Comm            comm;
4552   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4553   PetscMPIInt         size,rank,taga,*len_s;
4554   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4555   PetscInt            proc,m;
4556   PetscInt            **buf_ri,**buf_rj;
4557   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4558   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4559   MPI_Request         *s_waits,*r_waits;
4560   MPI_Status          *status;
4561   MatScalar           *aa=a->a;
4562   MatScalar           **abuf_r,*ba_i;
4563   Mat_Merge_SeqsToMPI *merge;
4564   PetscContainer      container;
4565 
4566   PetscFunctionBegin;
4567   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4568   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4569 
4570   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4571   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4572 
4573   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4574   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4575 
4576   bi     = merge->bi;
4577   bj     = merge->bj;
4578   buf_ri = merge->buf_ri;
4579   buf_rj = merge->buf_rj;
4580 
4581   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4582   owners = merge->rowmap->range;
4583   len_s  = merge->len_s;
4584 
4585   /* send and recv matrix values */
4586   /*-----------------------------*/
4587   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4588   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4589 
4590   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4591   for (proc=0,k=0; proc<size; proc++) {
4592     if (!len_s[proc]) continue;
4593     i    = owners[proc];
4594     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4595     k++;
4596   }
4597 
4598   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4599   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4600   ierr = PetscFree(status);CHKERRQ(ierr);
4601 
4602   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4603   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4604 
4605   /* insert mat values of mpimat */
4606   /*----------------------------*/
4607   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4608   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4609 
4610   for (k=0; k<merge->nrecv; k++) {
4611     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4612     nrows       = *(buf_ri_k[k]);
4613     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4614     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4615   }
4616 
4617   /* set values of ba */
4618   m = merge->rowmap->n;
4619   for (i=0; i<m; i++) {
4620     arow = owners[rank] + i;
4621     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4622     bnzi = bi[i+1] - bi[i];
4623     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4624 
4625     /* add local non-zero vals of this proc's seqmat into ba */
4626     anzi   = ai[arow+1] - ai[arow];
4627     aj     = a->j + ai[arow];
4628     aa     = a->a + ai[arow];
4629     nextaj = 0;
4630     for (j=0; nextaj<anzi; j++) {
4631       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4632         ba_i[j] += aa[nextaj++];
4633       }
4634     }
4635 
4636     /* add received vals into ba */
4637     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4638       /* i-th row */
4639       if (i == *nextrow[k]) {
4640         anzi   = *(nextai[k]+1) - *nextai[k];
4641         aj     = buf_rj[k] + *(nextai[k]);
4642         aa     = abuf_r[k] + *(nextai[k]);
4643         nextaj = 0;
4644         for (j=0; nextaj<anzi; j++) {
4645           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4646             ba_i[j] += aa[nextaj++];
4647           }
4648         }
4649         nextrow[k]++; nextai[k]++;
4650       }
4651     }
4652     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4653   }
4654   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4655   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4656 
4657   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4658   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4659   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4660   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4661   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4662   PetscFunctionReturn(0);
4663 }
4664 
4665 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4666 {
4667   PetscErrorCode      ierr;
4668   Mat                 B_mpi;
4669   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4670   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4671   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4672   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4673   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4674   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4675   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4676   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4677   MPI_Status          *status;
4678   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4679   PetscBT             lnkbt;
4680   Mat_Merge_SeqsToMPI *merge;
4681   PetscContainer      container;
4682 
4683   PetscFunctionBegin;
4684   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4685 
4686   /* make sure it is a PETSc comm */
4687   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4688   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4689   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4690 
4691   ierr = PetscNew(&merge);CHKERRQ(ierr);
4692   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4693 
4694   /* determine row ownership */
4695   /*---------------------------------------------------------*/
4696   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4697   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4698   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4699   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4700   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4701   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4702   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4703 
4704   m      = merge->rowmap->n;
4705   owners = merge->rowmap->range;
4706 
4707   /* determine the number of messages to send, their lengths */
4708   /*---------------------------------------------------------*/
4709   len_s = merge->len_s;
4710 
4711   len          = 0; /* length of buf_si[] */
4712   merge->nsend = 0;
4713   for (proc=0; proc<size; proc++) {
4714     len_si[proc] = 0;
4715     if (proc == rank) {
4716       len_s[proc] = 0;
4717     } else {
4718       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4719       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4720     }
4721     if (len_s[proc]) {
4722       merge->nsend++;
4723       nrows = 0;
4724       for (i=owners[proc]; i<owners[proc+1]; i++) {
4725         if (ai[i+1] > ai[i]) nrows++;
4726       }
4727       len_si[proc] = 2*(nrows+1);
4728       len         += len_si[proc];
4729     }
4730   }
4731 
4732   /* determine the number and length of messages to receive for ij-structure */
4733   /*-------------------------------------------------------------------------*/
4734   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4735   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4736 
4737   /* post the Irecv of j-structure */
4738   /*-------------------------------*/
4739   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4740   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4741 
4742   /* post the Isend of j-structure */
4743   /*--------------------------------*/
4744   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4745 
4746   for (proc=0, k=0; proc<size; proc++) {
4747     if (!len_s[proc]) continue;
4748     i    = owners[proc];
4749     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4750     k++;
4751   }
4752 
4753   /* receives and sends of j-structure are complete */
4754   /*------------------------------------------------*/
4755   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4756   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4757 
4758   /* send and recv i-structure */
4759   /*---------------------------*/
4760   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4761   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4762 
4763   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4764   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4765   for (proc=0,k=0; proc<size; proc++) {
4766     if (!len_s[proc]) continue;
4767     /* form outgoing message for i-structure:
4768          buf_si[0]:                 nrows to be sent
4769                [1:nrows]:           row index (global)
4770                [nrows+1:2*nrows+1]: i-structure index
4771     */
4772     /*-------------------------------------------*/
4773     nrows       = len_si[proc]/2 - 1;
4774     buf_si_i    = buf_si + nrows+1;
4775     buf_si[0]   = nrows;
4776     buf_si_i[0] = 0;
4777     nrows       = 0;
4778     for (i=owners[proc]; i<owners[proc+1]; i++) {
4779       anzi = ai[i+1] - ai[i];
4780       if (anzi) {
4781         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4782         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4783         nrows++;
4784       }
4785     }
4786     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4787     k++;
4788     buf_si += len_si[proc];
4789   }
4790 
4791   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4792   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4793 
4794   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4795   for (i=0; i<merge->nrecv; i++) {
4796     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4797   }
4798 
4799   ierr = PetscFree(len_si);CHKERRQ(ierr);
4800   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4801   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4802   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4803   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4804   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4805   ierr = PetscFree(status);CHKERRQ(ierr);
4806 
4807   /* compute a local seq matrix in each processor */
4808   /*----------------------------------------------*/
4809   /* allocate bi array and free space for accumulating nonzero column info */
4810   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4811   bi[0] = 0;
4812 
4813   /* create and initialize a linked list */
4814   nlnk = N+1;
4815   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4816 
4817   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4818   len  = ai[owners[rank+1]] - ai[owners[rank]];
4819   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4820 
4821   current_space = free_space;
4822 
4823   /* determine symbolic info for each local row */
4824   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4825 
4826   for (k=0; k<merge->nrecv; k++) {
4827     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4828     nrows       = *buf_ri_k[k];
4829     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4830     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4831   }
4832 
4833   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4834   len  = 0;
4835   for (i=0; i<m; i++) {
4836     bnzi = 0;
4837     /* add local non-zero cols of this proc's seqmat into lnk */
4838     arow  = owners[rank] + i;
4839     anzi  = ai[arow+1] - ai[arow];
4840     aj    = a->j + ai[arow];
4841     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4842     bnzi += nlnk;
4843     /* add received col data into lnk */
4844     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4845       if (i == *nextrow[k]) { /* i-th row */
4846         anzi  = *(nextai[k]+1) - *nextai[k];
4847         aj    = buf_rj[k] + *nextai[k];
4848         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4849         bnzi += nlnk;
4850         nextrow[k]++; nextai[k]++;
4851       }
4852     }
4853     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4854 
4855     /* if free space is not available, make more free space */
4856     if (current_space->local_remaining<bnzi) {
4857       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4858       nspacedouble++;
4859     }
4860     /* copy data into free space, then initialize lnk */
4861     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4862     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4863 
4864     current_space->array           += bnzi;
4865     current_space->local_used      += bnzi;
4866     current_space->local_remaining -= bnzi;
4867 
4868     bi[i+1] = bi[i] + bnzi;
4869   }
4870 
4871   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4872 
4873   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4874   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4875   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4876 
4877   /* create symbolic parallel matrix B_mpi */
4878   /*---------------------------------------*/
4879   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4880   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4881   if (n==PETSC_DECIDE) {
4882     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4883   } else {
4884     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4885   }
4886   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4887   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4888   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4889   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4890   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4891 
4892   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4893   B_mpi->assembled    = PETSC_FALSE;
4894   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4895   merge->bi           = bi;
4896   merge->bj           = bj;
4897   merge->buf_ri       = buf_ri;
4898   merge->buf_rj       = buf_rj;
4899   merge->coi          = NULL;
4900   merge->coj          = NULL;
4901   merge->owners_co    = NULL;
4902 
4903   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4904 
4905   /* attach the supporting struct to B_mpi for reuse */
4906   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4907   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4908   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4909   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4910   *mpimat = B_mpi;
4911 
4912   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4913   PetscFunctionReturn(0);
4914 }
4915 
4916 /*@C
4917       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4918                  matrices from each processor
4919 
4920     Collective on MPI_Comm
4921 
4922    Input Parameters:
4923 +    comm - the communicators the parallel matrix will live on
4924 .    seqmat - the input sequential matrices
4925 .    m - number of local rows (or PETSC_DECIDE)
4926 .    n - number of local columns (or PETSC_DECIDE)
4927 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4928 
4929    Output Parameter:
4930 .    mpimat - the parallel matrix generated
4931 
4932     Level: advanced
4933 
4934    Notes:
4935      The dimensions of the sequential matrix in each processor MUST be the same.
4936      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4937      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4938 @*/
4939 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4940 {
4941   PetscErrorCode ierr;
4942   PetscMPIInt    size;
4943 
4944   PetscFunctionBegin;
4945   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4946   if (size == 1) {
4947     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4948     if (scall == MAT_INITIAL_MATRIX) {
4949       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4950     } else {
4951       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4952     }
4953     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4954     PetscFunctionReturn(0);
4955   }
4956   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4957   if (scall == MAT_INITIAL_MATRIX) {
4958     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4959   }
4960   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4961   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 /*@
4966      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4967           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4968           with MatGetSize()
4969 
4970     Not Collective
4971 
4972    Input Parameters:
4973 +    A - the matrix
4974 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4975 
4976    Output Parameter:
4977 .    A_loc - the local sequential matrix generated
4978 
4979     Level: developer
4980 
4981 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4982 
4983 @*/
4984 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4985 {
4986   PetscErrorCode ierr;
4987   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4988   Mat_SeqAIJ     *mat,*a,*b;
4989   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4990   MatScalar      *aa,*ba,*cam;
4991   PetscScalar    *ca;
4992   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4993   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4994   PetscBool      match;
4995   MPI_Comm       comm;
4996   PetscMPIInt    size;
4997 
4998   PetscFunctionBegin;
4999   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5000   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5001   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5002   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5003   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5004 
5005   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5006   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5007   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5008   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5009   aa = a->a; ba = b->a;
5010   if (scall == MAT_INITIAL_MATRIX) {
5011     if (size == 1) {
5012       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5013       PetscFunctionReturn(0);
5014     }
5015 
5016     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5017     ci[0] = 0;
5018     for (i=0; i<am; i++) {
5019       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5020     }
5021     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5022     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5023     k    = 0;
5024     for (i=0; i<am; i++) {
5025       ncols_o = bi[i+1] - bi[i];
5026       ncols_d = ai[i+1] - ai[i];
5027       /* off-diagonal portion of A */
5028       for (jo=0; jo<ncols_o; jo++) {
5029         col = cmap[*bj];
5030         if (col >= cstart) break;
5031         cj[k]   = col; bj++;
5032         ca[k++] = *ba++;
5033       }
5034       /* diagonal portion of A */
5035       for (j=0; j<ncols_d; j++) {
5036         cj[k]   = cstart + *aj++;
5037         ca[k++] = *aa++;
5038       }
5039       /* off-diagonal portion of A */
5040       for (j=jo; j<ncols_o; j++) {
5041         cj[k]   = cmap[*bj++];
5042         ca[k++] = *ba++;
5043       }
5044     }
5045     /* put together the new matrix */
5046     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5047     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5048     /* Since these are PETSc arrays, change flags to free them as necessary. */
5049     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5050     mat->free_a  = PETSC_TRUE;
5051     mat->free_ij = PETSC_TRUE;
5052     mat->nonew   = 0;
5053   } else if (scall == MAT_REUSE_MATRIX) {
5054     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5055     ci = mat->i; cj = mat->j; cam = mat->a;
5056     for (i=0; i<am; i++) {
5057       /* off-diagonal portion of A */
5058       ncols_o = bi[i+1] - bi[i];
5059       for (jo=0; jo<ncols_o; jo++) {
5060         col = cmap[*bj];
5061         if (col >= cstart) break;
5062         *cam++ = *ba++; bj++;
5063       }
5064       /* diagonal portion of A */
5065       ncols_d = ai[i+1] - ai[i];
5066       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5067       /* off-diagonal portion of A */
5068       for (j=jo; j<ncols_o; j++) {
5069         *cam++ = *ba++; bj++;
5070       }
5071     }
5072   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5073   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5074   PetscFunctionReturn(0);
5075 }
5076 
5077 /*@C
5078      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5079 
5080     Not Collective
5081 
5082    Input Parameters:
5083 +    A - the matrix
5084 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5085 -    row, col - index sets of rows and columns to extract (or NULL)
5086 
5087    Output Parameter:
5088 .    A_loc - the local sequential matrix generated
5089 
5090     Level: developer
5091 
5092 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5093 
5094 @*/
5095 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5096 {
5097   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5098   PetscErrorCode ierr;
5099   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5100   IS             isrowa,iscola;
5101   Mat            *aloc;
5102   PetscBool      match;
5103 
5104   PetscFunctionBegin;
5105   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5106   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5107   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5108   if (!row) {
5109     start = A->rmap->rstart; end = A->rmap->rend;
5110     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5111   } else {
5112     isrowa = *row;
5113   }
5114   if (!col) {
5115     start = A->cmap->rstart;
5116     cmap  = a->garray;
5117     nzA   = a->A->cmap->n;
5118     nzB   = a->B->cmap->n;
5119     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5120     ncols = 0;
5121     for (i=0; i<nzB; i++) {
5122       if (cmap[i] < start) idx[ncols++] = cmap[i];
5123       else break;
5124     }
5125     imark = i;
5126     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5127     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5128     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5129   } else {
5130     iscola = *col;
5131   }
5132   if (scall != MAT_INITIAL_MATRIX) {
5133     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5134     aloc[0] = *A_loc;
5135   }
5136   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5137   if (!col) { /* attach global id of condensed columns */
5138     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5139   }
5140   *A_loc = aloc[0];
5141   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5142   if (!row) {
5143     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5144   }
5145   if (!col) {
5146     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5147   }
5148   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5149   PetscFunctionReturn(0);
5150 }
5151 
5152 /*@C
5153     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5154 
5155     Collective on Mat
5156 
5157    Input Parameters:
5158 +    A,B - the matrices in mpiaij format
5159 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5160 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5161 
5162    Output Parameter:
5163 +    rowb, colb - index sets of rows and columns of B to extract
5164 -    B_seq - the sequential matrix generated
5165 
5166     Level: developer
5167 
5168 @*/
5169 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5170 {
5171   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5172   PetscErrorCode ierr;
5173   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5174   IS             isrowb,iscolb;
5175   Mat            *bseq=NULL;
5176 
5177   PetscFunctionBegin;
5178   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5179     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5180   }
5181   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5182 
5183   if (scall == MAT_INITIAL_MATRIX) {
5184     start = A->cmap->rstart;
5185     cmap  = a->garray;
5186     nzA   = a->A->cmap->n;
5187     nzB   = a->B->cmap->n;
5188     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5189     ncols = 0;
5190     for (i=0; i<nzB; i++) {  /* row < local row index */
5191       if (cmap[i] < start) idx[ncols++] = cmap[i];
5192       else break;
5193     }
5194     imark = i;
5195     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5196     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5197     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5198     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5199   } else {
5200     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5201     isrowb  = *rowb; iscolb = *colb;
5202     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5203     bseq[0] = *B_seq;
5204   }
5205   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5206   *B_seq = bseq[0];
5207   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5208   if (!rowb) {
5209     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5210   } else {
5211     *rowb = isrowb;
5212   }
5213   if (!colb) {
5214     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5215   } else {
5216     *colb = iscolb;
5217   }
5218   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5219   PetscFunctionReturn(0);
5220 }
5221 
5222 /*
5223     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5224     of the OFF-DIAGONAL portion of local A
5225 
5226     Collective on Mat
5227 
5228    Input Parameters:
5229 +    A,B - the matrices in mpiaij format
5230 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5231 
5232    Output Parameter:
5233 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5234 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5235 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5236 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5237 
5238     Level: developer
5239 
5240 */
5241 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5242 {
5243   VecScatter_MPI_General *gen_to,*gen_from;
5244   PetscErrorCode         ierr;
5245   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5246   Mat_SeqAIJ             *b_oth;
5247   VecScatter             ctx;
5248   MPI_Comm               comm;
5249   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5250   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5251   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5252   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5253   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5254   MPI_Request            *rwaits = NULL,*swaits = NULL;
5255   MPI_Status             *sstatus,rstatus;
5256   PetscMPIInt            jj,size;
5257   VecScatterType         type;
5258   PetscBool              mpi1;
5259 
5260   PetscFunctionBegin;
5261   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5262   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5263 
5264   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5265     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5266   }
5267   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5268   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5269 
5270   if (size == 1) {
5271     startsj_s = NULL;
5272     bufa_ptr  = NULL;
5273     *B_oth    = NULL;
5274     PetscFunctionReturn(0);
5275   }
5276 
5277   ctx = a->Mvctx;
5278   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5279   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5280   if (!mpi1) {
5281     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5282      thus create a->Mvctx_mpi1 */
5283     if (!a->Mvctx_mpi1) {
5284       a->Mvctx_mpi1_flg = PETSC_TRUE;
5285       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5286     }
5287     ctx = a->Mvctx_mpi1;
5288   }
5289   tag = ((PetscObject)ctx)->tag;
5290 
5291   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5292   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5293   nrecvs   = gen_from->n;
5294   nsends   = gen_to->n;
5295 
5296   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5297   srow    = gen_to->indices;    /* local row index to be sent */
5298   sstarts = gen_to->starts;
5299   sprocs  = gen_to->procs;
5300   sstatus = gen_to->sstatus;
5301   sbs     = gen_to->bs;
5302   rstarts = gen_from->starts;
5303   rprocs  = gen_from->procs;
5304   rbs     = gen_from->bs;
5305 
5306   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5307   if (scall == MAT_INITIAL_MATRIX) {
5308     /* i-array */
5309     /*---------*/
5310     /*  post receives */
5311     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5312     for (i=0; i<nrecvs; i++) {
5313       rowlen = rvalues + rstarts[i]*rbs;
5314       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5315       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5316     }
5317 
5318     /* pack the outgoing message */
5319     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5320 
5321     sstartsj[0] = 0;
5322     rstartsj[0] = 0;
5323     len         = 0; /* total length of j or a array to be sent */
5324     k           = 0;
5325     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5326     for (i=0; i<nsends; i++) {
5327       rowlen = svalues + sstarts[i]*sbs;
5328       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5329       for (j=0; j<nrows; j++) {
5330         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5331         for (l=0; l<sbs; l++) {
5332           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5333 
5334           rowlen[j*sbs+l] = ncols;
5335 
5336           len += ncols;
5337           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5338         }
5339         k++;
5340       }
5341       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5342 
5343       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5344     }
5345     /* recvs and sends of i-array are completed */
5346     i = nrecvs;
5347     while (i--) {
5348       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5349     }
5350     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5351     ierr = PetscFree(svalues);CHKERRQ(ierr);
5352 
5353     /* allocate buffers for sending j and a arrays */
5354     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5355     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5356 
5357     /* create i-array of B_oth */
5358     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5359 
5360     b_othi[0] = 0;
5361     len       = 0; /* total length of j or a array to be received */
5362     k         = 0;
5363     for (i=0; i<nrecvs; i++) {
5364       rowlen = rvalues + rstarts[i]*rbs;
5365       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5366       for (j=0; j<nrows; j++) {
5367         b_othi[k+1] = b_othi[k] + rowlen[j];
5368         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5369         k++;
5370       }
5371       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5372     }
5373     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5374 
5375     /* allocate space for j and a arrrays of B_oth */
5376     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5377     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5378 
5379     /* j-array */
5380     /*---------*/
5381     /*  post receives of j-array */
5382     for (i=0; i<nrecvs; i++) {
5383       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5384       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5385     }
5386 
5387     /* pack the outgoing message j-array */
5388     k = 0;
5389     for (i=0; i<nsends; i++) {
5390       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5391       bufJ  = bufj+sstartsj[i];
5392       for (j=0; j<nrows; j++) {
5393         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5394         for (ll=0; ll<sbs; ll++) {
5395           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5396           for (l=0; l<ncols; l++) {
5397             *bufJ++ = cols[l];
5398           }
5399           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5400         }
5401       }
5402       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5403     }
5404 
5405     /* recvs and sends of j-array are completed */
5406     i = nrecvs;
5407     while (i--) {
5408       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5409     }
5410     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5411   } else if (scall == MAT_REUSE_MATRIX) {
5412     sstartsj = *startsj_s;
5413     rstartsj = *startsj_r;
5414     bufa     = *bufa_ptr;
5415     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5416     b_otha   = b_oth->a;
5417   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5418 
5419   /* a-array */
5420   /*---------*/
5421   /*  post receives of a-array */
5422   for (i=0; i<nrecvs; i++) {
5423     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5424     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5425   }
5426 
5427   /* pack the outgoing message a-array */
5428   k = 0;
5429   for (i=0; i<nsends; i++) {
5430     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5431     bufA  = bufa+sstartsj[i];
5432     for (j=0; j<nrows; j++) {
5433       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5434       for (ll=0; ll<sbs; ll++) {
5435         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5436         for (l=0; l<ncols; l++) {
5437           *bufA++ = vals[l];
5438         }
5439         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5440       }
5441     }
5442     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5443   }
5444   /* recvs and sends of a-array are completed */
5445   i = nrecvs;
5446   while (i--) {
5447     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5448   }
5449   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5450   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5451 
5452   if (scall == MAT_INITIAL_MATRIX) {
5453     /* put together the new matrix */
5454     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5455 
5456     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5457     /* Since these are PETSc arrays, change flags to free them as necessary. */
5458     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5459     b_oth->free_a  = PETSC_TRUE;
5460     b_oth->free_ij = PETSC_TRUE;
5461     b_oth->nonew   = 0;
5462 
5463     ierr = PetscFree(bufj);CHKERRQ(ierr);
5464     if (!startsj_s || !bufa_ptr) {
5465       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5466       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5467     } else {
5468       *startsj_s = sstartsj;
5469       *startsj_r = rstartsj;
5470       *bufa_ptr  = bufa;
5471     }
5472   }
5473   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5474   PetscFunctionReturn(0);
5475 }
5476 
5477 /*@C
5478   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5479 
5480   Not Collective
5481 
5482   Input Parameters:
5483 . A - The matrix in mpiaij format
5484 
5485   Output Parameter:
5486 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5487 . colmap - A map from global column index to local index into lvec
5488 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5489 
5490   Level: developer
5491 
5492 @*/
5493 #if defined(PETSC_USE_CTABLE)
5494 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5495 #else
5496 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5497 #endif
5498 {
5499   Mat_MPIAIJ *a;
5500 
5501   PetscFunctionBegin;
5502   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5503   PetscValidPointer(lvec, 2);
5504   PetscValidPointer(colmap, 3);
5505   PetscValidPointer(multScatter, 4);
5506   a = (Mat_MPIAIJ*) A->data;
5507   if (lvec) *lvec = a->lvec;
5508   if (colmap) *colmap = a->colmap;
5509   if (multScatter) *multScatter = a->Mvctx;
5510   PetscFunctionReturn(0);
5511 }
5512 
5513 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5516 #if defined(PETSC_HAVE_MKL_SPARSE)
5517 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5518 #endif
5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5520 #if defined(PETSC_HAVE_ELEMENTAL)
5521 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5522 #endif
5523 #if defined(PETSC_HAVE_HYPRE)
5524 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5525 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5526 #endif
5527 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5528 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5529 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5530 
5531 /*
5532     Computes (B'*A')' since computing B*A directly is untenable
5533 
5534                n                       p                          p
5535         (              )       (              )         (                  )
5536       m (      A       )  *  n (       B      )   =   m (         C        )
5537         (              )       (              )         (                  )
5538 
5539 */
5540 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5541 {
5542   PetscErrorCode ierr;
5543   Mat            At,Bt,Ct;
5544 
5545   PetscFunctionBegin;
5546   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5547   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5548   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5549   ierr = MatDestroy(&At);CHKERRQ(ierr);
5550   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5551   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5552   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5553   PetscFunctionReturn(0);
5554 }
5555 
5556 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5557 {
5558   PetscErrorCode ierr;
5559   PetscInt       m=A->rmap->n,n=B->cmap->n;
5560   Mat            Cmat;
5561 
5562   PetscFunctionBegin;
5563   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5564   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5565   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5566   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5567   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5568   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5569   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5570   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5571 
5572   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5573 
5574   *C = Cmat;
5575   PetscFunctionReturn(0);
5576 }
5577 
5578 /* ----------------------------------------------------------------*/
5579 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5580 {
5581   PetscErrorCode ierr;
5582 
5583   PetscFunctionBegin;
5584   if (scall == MAT_INITIAL_MATRIX) {
5585     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5586     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5587     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5588   }
5589   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5590   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5591   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5592   PetscFunctionReturn(0);
5593 }
5594 
5595 /*MC
5596    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5597 
5598    Options Database Keys:
5599 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5600 
5601   Level: beginner
5602 
5603 .seealso: MatCreateAIJ()
5604 M*/
5605 
5606 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5607 {
5608   Mat_MPIAIJ     *b;
5609   PetscErrorCode ierr;
5610   PetscMPIInt    size;
5611 
5612   PetscFunctionBegin;
5613   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5614 
5615   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5616   B->data       = (void*)b;
5617   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5618   B->assembled  = PETSC_FALSE;
5619   B->insertmode = NOT_SET_VALUES;
5620   b->size       = size;
5621 
5622   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5623 
5624   /* build cache for off array entries formed */
5625   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5626 
5627   b->donotstash  = PETSC_FALSE;
5628   b->colmap      = 0;
5629   b->garray      = 0;
5630   b->roworiented = PETSC_TRUE;
5631 
5632   /* stuff used for matrix vector multiply */
5633   b->lvec  = NULL;
5634   b->Mvctx = NULL;
5635 
5636   /* stuff for MatGetRow() */
5637   b->rowindices   = 0;
5638   b->rowvalues    = 0;
5639   b->getrowactive = PETSC_FALSE;
5640 
5641   /* flexible pointer used in CUSP/CUSPARSE classes */
5642   b->spptr = NULL;
5643 
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5654 #if defined(PETSC_HAVE_MKL_SPARSE)
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5656 #endif
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5659 #if defined(PETSC_HAVE_ELEMENTAL)
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5661 #endif
5662 #if defined(PETSC_HAVE_HYPRE)
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5664 #endif
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5670 #if defined(PETSC_HAVE_HYPRE)
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5672 #endif
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5675   PetscFunctionReturn(0);
5676 }
5677 
5678 /*@C
5679      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5680          and "off-diagonal" part of the matrix in CSR format.
5681 
5682    Collective on MPI_Comm
5683 
5684    Input Parameters:
5685 +  comm - MPI communicator
5686 .  m - number of local rows (Cannot be PETSC_DECIDE)
5687 .  n - This value should be the same as the local size used in creating the
5688        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5689        calculated if N is given) For square matrices n is almost always m.
5690 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5691 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5692 .   i - row indices for "diagonal" portion of matrix
5693 .   j - column indices
5694 .   a - matrix values
5695 .   oi - row indices for "off-diagonal" portion of matrix
5696 .   oj - column indices
5697 -   oa - matrix values
5698 
5699    Output Parameter:
5700 .   mat - the matrix
5701 
5702    Level: advanced
5703 
5704    Notes:
5705        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5706        must free the arrays once the matrix has been destroyed and not before.
5707 
5708        The i and j indices are 0 based
5709 
5710        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5711 
5712        This sets local rows and cannot be used to set off-processor values.
5713 
5714        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5715        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5716        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5717        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5718        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5719        communication if it is known that only local entries will be set.
5720 
5721 .keywords: matrix, aij, compressed row, sparse, parallel
5722 
5723 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5724           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5725 @*/
5726 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5727 {
5728   PetscErrorCode ierr;
5729   Mat_MPIAIJ     *maij;
5730 
5731   PetscFunctionBegin;
5732   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5733   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5734   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5735   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5736   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5737   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5738   maij = (Mat_MPIAIJ*) (*mat)->data;
5739 
5740   (*mat)->preallocated = PETSC_TRUE;
5741 
5742   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5743   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5744 
5745   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5746   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5747 
5748   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5749   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5750   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752 
5753   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5754   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5755   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5756   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5757   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5758   PetscFunctionReturn(0);
5759 }
5760 
5761 /*
5762     Special version for direct calls from Fortran
5763 */
5764 #include <petsc/private/fortranimpl.h>
5765 
5766 /* Change these macros so can be used in void function */
5767 #undef CHKERRQ
5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5769 #undef SETERRQ2
5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5771 #undef SETERRQ3
5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5773 #undef SETERRQ
5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5775 
5776 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5777 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5778 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5779 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5780 #else
5781 #endif
5782 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5783 {
5784   Mat            mat  = *mmat;
5785   PetscInt       m    = *mm, n = *mn;
5786   InsertMode     addv = *maddv;
5787   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5788   PetscScalar    value;
5789   PetscErrorCode ierr;
5790 
5791   MatCheckPreallocated(mat,1);
5792   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5793 
5794 #if defined(PETSC_USE_DEBUG)
5795   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5796 #endif
5797   {
5798     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5799     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5800     PetscBool roworiented = aij->roworiented;
5801 
5802     /* Some Variables required in the macro */
5803     Mat        A                 = aij->A;
5804     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5805     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5806     MatScalar  *aa               = a->a;
5807     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5808     Mat        B                 = aij->B;
5809     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5810     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5811     MatScalar  *ba               = b->a;
5812 
5813     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5814     PetscInt  nonew = a->nonew;
5815     MatScalar *ap1,*ap2;
5816 
5817     PetscFunctionBegin;
5818     for (i=0; i<m; i++) {
5819       if (im[i] < 0) continue;
5820 #if defined(PETSC_USE_DEBUG)
5821       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5822 #endif
5823       if (im[i] >= rstart && im[i] < rend) {
5824         row      = im[i] - rstart;
5825         lastcol1 = -1;
5826         rp1      = aj + ai[row];
5827         ap1      = aa + ai[row];
5828         rmax1    = aimax[row];
5829         nrow1    = ailen[row];
5830         low1     = 0;
5831         high1    = nrow1;
5832         lastcol2 = -1;
5833         rp2      = bj + bi[row];
5834         ap2      = ba + bi[row];
5835         rmax2    = bimax[row];
5836         nrow2    = bilen[row];
5837         low2     = 0;
5838         high2    = nrow2;
5839 
5840         for (j=0; j<n; j++) {
5841           if (roworiented) value = v[i*n+j];
5842           else value = v[i+j*m];
5843           if (in[j] >= cstart && in[j] < cend) {
5844             col = in[j] - cstart;
5845             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5846             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5847           } else if (in[j] < 0) continue;
5848 #if defined(PETSC_USE_DEBUG)
5849           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5850           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5851 #endif
5852           else {
5853             if (mat->was_assembled) {
5854               if (!aij->colmap) {
5855                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5856               }
5857 #if defined(PETSC_USE_CTABLE)
5858               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5859               col--;
5860 #else
5861               col = aij->colmap[in[j]] - 1;
5862 #endif
5863               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5864               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5865                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5866                 col  =  in[j];
5867                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5868                 B     = aij->B;
5869                 b     = (Mat_SeqAIJ*)B->data;
5870                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5871                 rp2   = bj + bi[row];
5872                 ap2   = ba + bi[row];
5873                 rmax2 = bimax[row];
5874                 nrow2 = bilen[row];
5875                 low2  = 0;
5876                 high2 = nrow2;
5877                 bm    = aij->B->rmap->n;
5878                 ba    = b->a;
5879               }
5880             } else col = in[j];
5881             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5882           }
5883         }
5884       } else if (!aij->donotstash) {
5885         if (roworiented) {
5886           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5887         } else {
5888           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5889         }
5890       }
5891     }
5892   }
5893   PetscFunctionReturnVoid();
5894 }
5895 
5896