xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 487a658c8b32ba712a1dc8280daad2fd70c1dcd9)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscMPIInt    size;
1105 
1106   PetscFunctionBegin;
1107   /* Easy test: symmetric diagonal block */
1108   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1109   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1110   if (!*f) PetscFunctionReturn(0);
1111   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1113   if (size == 1) PetscFunctionReturn(0);
1114 
1115   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1116   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1117   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1118   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1119   for (i=0; i<first; i++) notme[i] = i;
1120   for (i=last; i<M; i++) notme[i-last+first] = i;
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1122   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1123   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1124   Aoff = Aoffs[0];
1125   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1126   Boff = Boffs[0];
1127   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1128   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1129   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1130   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1131   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1132   ierr = PetscFree(notme);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1137 {
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* send it on its way */
1154   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155   /* do local part */
1156   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1157   /* receive remote parts */
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1167 {
1168   PetscErrorCode ierr;
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170 
1171   PetscFunctionBegin;
1172   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1173   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1174   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1179 {
1180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1185   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1190 {
1191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195 #if defined(PETSC_USE_LOG)
1196   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1197 #endif
1198   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1199   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1200   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1201   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1202 #if defined(PETSC_USE_CTABLE)
1203   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1204 #else
1205   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1206 #endif
1207   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1208   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1209   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1210   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1211   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1212   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1213   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1214 
1215   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_ELEMENTAL)
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1226 #endif
1227 #if defined(PETSC_HAVE_HYPRE)
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1230 #endif
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1237 {
1238   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1239   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1240   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1241   PetscErrorCode ierr;
1242   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1243   int            fd;
1244   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1245   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1246   PetscScalar    *column_values;
1247   PetscInt       message_count,flowcontrolcount;
1248   FILE           *file;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1252   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1253   nz   = A->nz + B->nz;
1254   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1255   if (!rank) {
1256     header[0] = MAT_FILE_CLASSID;
1257     header[1] = mat->rmap->N;
1258     header[2] = mat->cmap->N;
1259 
1260     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     /* get largest number of rows any processor has */
1263     rlen  = mat->rmap->n;
1264     range = mat->rmap->range;
1265     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1266   } else {
1267     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     rlen = mat->rmap->n;
1269   }
1270 
1271   /* load up the local row counts */
1272   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1273   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1274 
1275   /* store the row lengths to the file */
1276   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1279     for (i=1; i<size; i++) {
1280       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1281       rlen = range[i+1] - range[i];
1282       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     }
1285     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1286   } else {
1287     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1288     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1290   }
1291   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1292 
1293   /* load up the local column indices */
1294   nzmax = nz; /* th processor needs space a largest processor needs */
1295   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1297   cnt   = 0;
1298   for (i=0; i<mat->rmap->n; i++) {
1299     for (j=B->i[i]; j<B->i[i+1]; j++) {
1300       if ((col = garray[B->j[j]]) > cstart) break;
1301       column_indices[cnt++] = col;
1302     }
1303     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1304     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1305   }
1306   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1307 
1308   /* store the column indices to the file */
1309   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1310   if (!rank) {
1311     MPI_Status status;
1312     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     for (i=1; i<size; i++) {
1314       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1315       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1316       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1317       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1319     }
1320     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1321   } else {
1322     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1323     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1324     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1325     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1326   }
1327   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1328 
1329   /* load up the local column values */
1330   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1331   cnt  = 0;
1332   for (i=0; i<mat->rmap->n; i++) {
1333     for (j=B->i[i]; j<B->i[i+1]; j++) {
1334       if (garray[B->j[j]] > cstart) break;
1335       column_values[cnt++] = B->a[j];
1336     }
1337     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1338     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1339   }
1340   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1341 
1342   /* store the column values to the file */
1343   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1344   if (!rank) {
1345     MPI_Status status;
1346     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1350       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1351       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1352       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1353     }
1354     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1355   } else {
1356     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1357     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1359     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1360   }
1361   ierr = PetscFree(column_values);CHKERRQ(ierr);
1362 
1363   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1364   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #include <petscdraw.h>
1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1370 {
1371   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1372   PetscErrorCode    ierr;
1373   PetscMPIInt       rank = aij->rank,size = aij->size;
1374   PetscBool         isdraw,iascii,isbinary;
1375   PetscViewer       sviewer;
1376   PetscViewerFormat format;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1380   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1382   if (iascii) {
1383     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1384     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1385       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1386       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1387       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1388       for (i=0; i<(PetscInt)size; i++) {
1389         nmax = PetscMax(nmax,nz[i]);
1390         nmin = PetscMin(nmin,nz[i]);
1391         navg += nz[i];
1392       }
1393       ierr = PetscFree(nz);CHKERRQ(ierr);
1394       navg = navg/size;
1395       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1396       PetscFunctionReturn(0);
1397     }
1398     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1399     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1400       MatInfo   info;
1401       PetscBool inodes;
1402 
1403       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1404       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1405       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1406       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1407       if (!inodes) {
1408         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1409                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1410       } else {
1411         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1412                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1413       }
1414       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1416       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1420       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1421       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1422       PetscFunctionReturn(0);
1423     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1424       PetscInt inodecount,inodelimit,*inodes;
1425       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1426       if (inodes) {
1427         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1428       } else {
1429         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1430       }
1431       PetscFunctionReturn(0);
1432     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1433       PetscFunctionReturn(0);
1434     }
1435   } else if (isbinary) {
1436     if (size == 1) {
1437       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1438       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1439     } else {
1440       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1441     }
1442     PetscFunctionReturn(0);
1443   } else if (isdraw) {
1444     PetscDraw draw;
1445     PetscBool isnull;
1446     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1447     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1448     if (isnull) PetscFunctionReturn(0);
1449   }
1450 
1451   {
1452     /* assemble the entire matrix onto first processor. */
1453     Mat        A;
1454     Mat_SeqAIJ *Aloc;
1455     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1456     MatScalar  *a;
1457 
1458     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1459     if (!rank) {
1460       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1463     }
1464     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1465     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1466     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1467     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1468     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1469 
1470     /* copy over the A part */
1471     Aloc = (Mat_SeqAIJ*)aij->A->data;
1472     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1473     row  = mat->rmap->rstart;
1474     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1475     for (i=0; i<m; i++) {
1476       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1477       row++;
1478       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1479     }
1480     aj = Aloc->j;
1481     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1482 
1483     /* copy over the B part */
1484     Aloc = (Mat_SeqAIJ*)aij->B->data;
1485     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1486     row  = mat->rmap->rstart;
1487     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1488     ct   = cols;
1489     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1490     for (i=0; i<m; i++) {
1491       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1492       row++;
1493       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1494     }
1495     ierr = PetscFree(ct);CHKERRQ(ierr);
1496     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1497     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1498     /*
1499        Everyone has to call to draw the matrix since the graphics waits are
1500        synchronized across all processors that share the PetscDraw object
1501     */
1502     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1503     if (!rank) {
1504       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1505       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1506     }
1507     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1508     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1509     ierr = MatDestroy(&A);CHKERRQ(ierr);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1515 {
1516   PetscErrorCode ierr;
1517   PetscBool      iascii,isdraw,issocket,isbinary;
1518 
1519   PetscFunctionBegin;
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1521   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1524   if (iascii || isdraw || isbinary || issocket) {
1525     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1531 {
1532   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1533   PetscErrorCode ierr;
1534   Vec            bb1 = 0;
1535   PetscBool      hasop;
1536 
1537   PetscFunctionBegin;
1538   if (flag == SOR_APPLY_UPPER) {
1539     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1540     PetscFunctionReturn(0);
1541   }
1542 
1543   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1544     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1545   }
1546 
1547   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1548     if (flag & SOR_ZERO_INITIAL_GUESS) {
1549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550       its--;
1551     }
1552 
1553     while (its--) {
1554       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557       /* update rhs: bb1 = bb - B*x */
1558       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1560 
1561       /* local sweep */
1562       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1563     }
1564   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1565     if (flag & SOR_ZERO_INITIAL_GUESS) {
1566       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1567       its--;
1568     }
1569     while (its--) {
1570       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572 
1573       /* update rhs: bb1 = bb - B*x */
1574       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1575       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1576 
1577       /* local sweep */
1578       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1579     }
1580   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1581     if (flag & SOR_ZERO_INITIAL_GUESS) {
1582       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1583       its--;
1584     }
1585     while (its--) {
1586       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588 
1589       /* update rhs: bb1 = bb - B*x */
1590       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1591       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1592 
1593       /* local sweep */
1594       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1595     }
1596   } else if (flag & SOR_EISENSTAT) {
1597     Vec xx1;
1598 
1599     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1601 
1602     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (!mat->diag) {
1605       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1606       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1607     }
1608     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1609     if (hasop) {
1610       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1611     } else {
1612       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1613     }
1614     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1615 
1616     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1617 
1618     /* local sweep */
1619     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1620     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1621     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1622   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1623 
1624   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1625 
1626   matin->factorerrortype = mat->A->factorerrortype;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1631 {
1632   Mat            aA,aB,Aperm;
1633   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1634   PetscScalar    *aa,*ba;
1635   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1636   PetscSF        rowsf,sf;
1637   IS             parcolp = NULL;
1638   PetscBool      done;
1639   PetscErrorCode ierr;
1640 
1641   PetscFunctionBegin;
1642   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1643   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1644   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1645   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1646 
1647   /* Invert row permutation to find out where my rows should go */
1648   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1649   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1650   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1651   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1652   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1653   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1654 
1655   /* Invert column permutation to find out where my columns should go */
1656   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1657   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1658   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1659   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1660   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1661   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1662   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1663 
1664   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1665   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1666   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1667 
1668   /* Find out where my gcols should go */
1669   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1670   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1671   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1672   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1673   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1674   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1675   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1676   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1677 
1678   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1679   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1680   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1681   for (i=0; i<m; i++) {
1682     PetscInt row = rdest[i],rowner;
1683     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1684     for (j=ai[i]; j<ai[i+1]; j++) {
1685       PetscInt cowner,col = cdest[aj[j]];
1686       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1687       if (rowner == cowner) dnnz[i]++;
1688       else onnz[i]++;
1689     }
1690     for (j=bi[i]; j<bi[i+1]; j++) {
1691       PetscInt cowner,col = gcdest[bj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscReal      isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set = PETSC_TRUE;
1825     A->spd     = flg;
1826     if (flg) {
1827       A->symmetric                  = PETSC_TRUE;
1828       A->structurally_symmetric     = PETSC_TRUE;
1829       A->symmetric_set              = PETSC_TRUE;
1830       A->structurally_symmetric_set = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     MatCheckPreallocated(A,1);
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837   case MAT_STRUCTURALLY_SYMMETRIC:
1838     MatCheckPreallocated(A,1);
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_HERMITIAN:
1842     MatCheckPreallocated(A,1);
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   case MAT_SYMMETRY_ETERNAL:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_SUBMAT_SINGLEIS:
1850     A->submat_singleis = flg;
1851     break;
1852   case MAT_STRUCTURE_ONLY:
1853     /* The option is handled directly by MatSetOption() */
1854     break;
1855   default:
1856     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) max = tmp;
1883     }
1884     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz  = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1940 {
1941   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1942 
1943   PetscFunctionBegin;
1944   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1945   aij->getrowactive = PETSC_FALSE;
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1950 {
1951   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1952   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,j,cstart = mat->cmap->rstart;
1955   PetscReal      sum = 0.0;
1956   MatScalar      *v;
1957 
1958   PetscFunctionBegin;
1959   if (aij->size == 1) {
1960     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1961   } else {
1962     if (type == NORM_FROBENIUS) {
1963       v = amat->a;
1964       for (i=0; i<amat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970       }
1971       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1978       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v     = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v   = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2010       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2011     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2017 {
2018   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2019   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2020   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2021   PetscErrorCode ierr;
2022   Mat            B,A_diag,*B_diag;
2023   MatScalar      *array;
2024 
2025   PetscFunctionBegin;
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2031     PetscSFNode          *oloc;
2032     PETSC_UNUSED PetscSF sf;
2033 
2034     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2035     /* compute d_nnz for preallocation */
2036     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2037     for (i=0; i<ai[ma]; i++) {
2038       d_nnz[aj[i]]++;
2039     }
2040     /* compute local off-diagonal contributions */
2041     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2043     /* map those to global */
2044     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2045     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2046     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2047     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2048     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2051 
2052     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2053     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2054     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2055     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2056     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2057     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2058   } else {
2059     B    = *matout;
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2061   }
2062 
2063   b           = (Mat_MPIAIJ*)B->data;
2064   A_diag      = a->A;
2065   B_diag      = &b->A;
2066   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2067   A_diag_ncol = A_diag->cmap->N;
2068   B_diag_ilen = sub_B_diag->ilen;
2069   B_diag_i    = sub_B_diag->i;
2070 
2071   /* Set ilen for diagonal of B */
2072   for (i=0; i<A_diag_ncol; i++) {
2073     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2074   }
2075 
2076   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2077   very quickly (=without using MatSetValues), because all writes are local. */
2078   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2079 
2080   /* copy over the B part */
2081   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2082   array = Bloc->a;
2083   row   = A->rmap->rstart;
2084   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2085   cols_tmp = cols;
2086   for (i=0; i<mb; i++) {
2087     ncol = bi[i+1]-bi[i];
2088     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2089     row++;
2090     array += ncol; cols_tmp += ncol;
2091   }
2092   ierr = PetscFree(cols);CHKERRQ(ierr);
2093 
2094   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2097     *matout = B;
2098   } else {
2099     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2105 {
2106   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2107   Mat            a    = aij->A,b = aij->B;
2108   PetscErrorCode ierr;
2109   PetscInt       s1,s2,s3;
2110 
2111   PetscFunctionBegin;
2112   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2113   if (rr) {
2114     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2115     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2116     /* Overlap communication with computation. */
2117     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118   }
2119   if (ll) {
2120     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2121     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2122     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2123   }
2124   /* scale  the diagonal block */
2125   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2126 
2127   if (rr) {
2128     /* Do a scatter end and then right scale the off-diagonal block */
2129     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2130     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2146 {
2147   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2148   Mat            a,b,c,d;
2149   PetscBool      flg;
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   a = matA->A; b = matA->B;
2154   c = matB->A; d = matB->B;
2155 
2156   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2157   if (flg) {
2158     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2159   }
2160   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2165 {
2166   PetscErrorCode ierr;
2167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2168   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2169 
2170   PetscFunctionBegin;
2171   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2172   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2173     /* because of the column compression in the off-processor part of the matrix a->B,
2174        the number of columns in a->B and b->B may be different, hence we cannot call
2175        the MatCopy() directly on the two parts. If need be, we can provide a more
2176        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2177        then copying the submatrices */
2178     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2179   } else {
2180     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2181     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2182   }
2183   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2188 {
2189   PetscErrorCode ierr;
2190 
2191   PetscFunctionBegin;
2192   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 /*
2197    Computes the number of nonzeros per row needed for preallocation when X and Y
2198    have different nonzero structure.
2199 */
2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2201 {
2202   PetscInt       i,j,k,nzx,nzy;
2203 
2204   PetscFunctionBegin;
2205   /* Set the number of nonzeros in the new matrix */
2206   for (i=0; i<m; i++) {
2207     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2208     nzx = xi[i+1] - xi[i];
2209     nzy = yi[i+1] - yi[i];
2210     nnz[i] = 0;
2211     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2212       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2213       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2214       nnz[i]++;
2215     }
2216     for (; k<nzy; k++) nnz[i]++;
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2223 {
2224   PetscErrorCode ierr;
2225   PetscInt       m = Y->rmap->N;
2226   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2227   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2235 {
2236   PetscErrorCode ierr;
2237   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2238   PetscBLASInt   bnz,one=1;
2239   Mat_SeqAIJ     *x,*y;
2240 
2241   PetscFunctionBegin;
2242   if (str == SAME_NONZERO_PATTERN) {
2243     PetscScalar alpha = a;
2244     x    = (Mat_SeqAIJ*)xx->A->data;
2245     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2246     y    = (Mat_SeqAIJ*)yy->A->data;
2247     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2248     x    = (Mat_SeqAIJ*)xx->B->data;
2249     y    = (Mat_SeqAIJ*)yy->B->data;
2250     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2251     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2252     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2253   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2254     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2255   } else {
2256     Mat      B;
2257     PetscInt *nnz_d,*nnz_o;
2258     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2259     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2260     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2261     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2262     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2263     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2264     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2265     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2266     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2267     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2268     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2269     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2270     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2271     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2272   }
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2277 
2278 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2279 {
2280 #if defined(PETSC_USE_COMPLEX)
2281   PetscErrorCode ierr;
2282   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2283 
2284   PetscFunctionBegin;
2285   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2286   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2287 #else
2288   PetscFunctionBegin;
2289 #endif
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2300   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2311   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2316 {
2317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2318   PetscErrorCode ierr;
2319   PetscInt       i,*idxb = 0;
2320   PetscScalar    *va,*vb;
2321   Vec            vtmp;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2325   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2326   if (idx) {
2327     for (i=0; i<A->rmap->n; i++) {
2328       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2329     }
2330   }
2331 
2332   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2333   if (idx) {
2334     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2335   }
2336   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2337   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2338 
2339   for (i=0; i<A->rmap->n; i++) {
2340     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2341       va[i] = vb[i];
2342       if (idx) idx[i] = a->garray[idxb[i]];
2343     }
2344   }
2345 
2346   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2347   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2348   ierr = PetscFree(idxb);CHKERRQ(ierr);
2349   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2354 {
2355   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2356   PetscErrorCode ierr;
2357   PetscInt       i,*idxb = 0;
2358   PetscScalar    *va,*vb;
2359   Vec            vtmp;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2363   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2364   if (idx) {
2365     for (i=0; i<A->cmap->n; i++) {
2366       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2367     }
2368   }
2369 
2370   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2371   if (idx) {
2372     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2373   }
2374   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2375   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2376 
2377   for (i=0; i<A->rmap->n; i++) {
2378     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2379       va[i] = vb[i];
2380       if (idx) idx[i] = a->garray[idxb[i]];
2381     }
2382   }
2383 
2384   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2385   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2386   ierr = PetscFree(idxb);CHKERRQ(ierr);
2387   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2392 {
2393   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2394   PetscInt       n      = A->rmap->n;
2395   PetscInt       cstart = A->cmap->rstart;
2396   PetscInt       *cmap  = mat->garray;
2397   PetscInt       *diagIdx, *offdiagIdx;
2398   Vec            diagV, offdiagV;
2399   PetscScalar    *a, *diagA, *offdiagA;
2400   PetscInt       r;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2405   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2406   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2407   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2408   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2409   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2410   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2411   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2412   for (r = 0; r < n; ++r) {
2413     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2414       a[r]   = diagA[r];
2415       idx[r] = cstart + diagIdx[r];
2416     } else {
2417       a[r]   = offdiagA[r];
2418       idx[r] = cmap[offdiagIdx[r]];
2419     }
2420   }
2421   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2423   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2424   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2425   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2426   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2431 {
2432   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2433   PetscInt       n      = A->rmap->n;
2434   PetscInt       cstart = A->cmap->rstart;
2435   PetscInt       *cmap  = mat->garray;
2436   PetscInt       *diagIdx, *offdiagIdx;
2437   Vec            diagV, offdiagV;
2438   PetscScalar    *a, *diagA, *offdiagA;
2439   PetscInt       r;
2440   PetscErrorCode ierr;
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2444   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2446   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2447   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2448   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2449   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2450   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   for (r = 0; r < n; ++r) {
2452     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2453       a[r]   = diagA[r];
2454       idx[r] = cstart + diagIdx[r];
2455     } else {
2456       a[r]   = offdiagA[r];
2457       idx[r] = cmap[offdiagIdx[r]];
2458     }
2459   }
2460   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2461   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2463   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2464   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2465   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2470 {
2471   PetscErrorCode ierr;
2472   Mat            *dummy;
2473 
2474   PetscFunctionBegin;
2475   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2476   *newmat = *dummy;
2477   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2482 {
2483   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2484   PetscErrorCode ierr;
2485 
2486   PetscFunctionBegin;
2487   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2488   A->factorerrortype = a->A->factorerrortype;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2493 {
2494   PetscErrorCode ierr;
2495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2496 
2497   PetscFunctionBegin;
2498   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2499   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2500   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2501   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2506 {
2507   PetscFunctionBegin;
2508   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2509   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 /*@
2514    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2515 
2516    Collective on Mat
2517 
2518    Input Parameters:
2519 +    A - the matrix
2520 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2521 
2522  Level: advanced
2523 
2524 @*/
2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2526 {
2527   PetscErrorCode       ierr;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2535 {
2536   PetscErrorCode       ierr;
2537   PetscBool            sc = PETSC_FALSE,flg;
2538 
2539   PetscFunctionBegin;
2540   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2541   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2542   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2543   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2544   if (flg) {
2545     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2546   }
2547   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2552 {
2553   PetscErrorCode ierr;
2554   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2555   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2556 
2557   PetscFunctionBegin;
2558   if (!Y->preallocated) {
2559     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2560   } else if (!aij->nz) {
2561     PetscInt nonew = aij->nonew;
2562     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2563     aij->nonew = nonew;
2564   }
2565   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2570 {
2571   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2572   PetscErrorCode ierr;
2573 
2574   PetscFunctionBegin;
2575   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2576   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2577   if (d) {
2578     PetscInt rstart;
2579     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2580     *d += rstart;
2581 
2582   }
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 
2587 /* -------------------------------------------------------------------*/
2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2589                                        MatGetRow_MPIAIJ,
2590                                        MatRestoreRow_MPIAIJ,
2591                                        MatMult_MPIAIJ,
2592                                 /* 4*/ MatMultAdd_MPIAIJ,
2593                                        MatMultTranspose_MPIAIJ,
2594                                        MatMultTransposeAdd_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*10*/ 0,
2599                                        0,
2600                                        0,
2601                                        MatSOR_MPIAIJ,
2602                                        MatTranspose_MPIAIJ,
2603                                 /*15*/ MatGetInfo_MPIAIJ,
2604                                        MatEqual_MPIAIJ,
2605                                        MatGetDiagonal_MPIAIJ,
2606                                        MatDiagonalScale_MPIAIJ,
2607                                        MatNorm_MPIAIJ,
2608                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2609                                        MatAssemblyEnd_MPIAIJ,
2610                                        MatSetOption_MPIAIJ,
2611                                        MatZeroEntries_MPIAIJ,
2612                                 /*24*/ MatZeroRows_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*29*/ MatSetUp_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        MatGetDiagonalBlock_MPIAIJ,
2621                                        0,
2622                                 /*34*/ MatDuplicate_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*39*/ MatAXPY_MPIAIJ,
2628                                        MatCreateSubMatrices_MPIAIJ,
2629                                        MatIncreaseOverlap_MPIAIJ,
2630                                        MatGetValues_MPIAIJ,
2631                                        MatCopy_MPIAIJ,
2632                                 /*44*/ MatGetRowMax_MPIAIJ,
2633                                        MatScale_MPIAIJ,
2634                                        MatShift_MPIAIJ,
2635                                        MatDiagonalSet_MPIAIJ,
2636                                        MatZeroRowsColumns_MPIAIJ,
2637                                 /*49*/ MatSetRandom_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2643                                        0,
2644                                        MatSetUnfactored_MPIAIJ,
2645                                        MatPermute_MPIAIJ,
2646                                        0,
2647                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2648                                        MatDestroy_MPIAIJ,
2649                                        MatView_MPIAIJ,
2650                                        0,
2651                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2652                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2653                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2658                                        MatGetRowMinAbs_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*75*/ MatFDColoringApply_AIJ,
2664                                        MatSetFromOptions_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        MatFindZeroDiagonals_MPIAIJ,
2668                                 /*80*/ 0,
2669                                        0,
2670                                        0,
2671                                 /*83*/ MatLoad_MPIAIJ,
2672                                        MatIsSymmetric_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2678                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2679                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2680                                        MatPtAP_MPIAIJ_MPIAIJ,
2681                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2682                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*99*/ 0,
2688                                        0,
2689                                        0,
2690                                        MatConjugate_MPIAIJ,
2691                                        0,
2692                                 /*104*/MatSetValuesRow_MPIAIJ,
2693                                        MatRealPart_MPIAIJ,
2694                                        MatImaginaryPart_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                 /*109*/0,
2698                                        0,
2699                                        MatGetRowMin_MPIAIJ,
2700                                        0,
2701                                        MatMissingDiagonal_MPIAIJ,
2702                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2703                                        0,
2704                                        MatGetGhosts_MPIAIJ,
2705                                        0,
2706                                        0,
2707                                 /*119*/0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatGetMultiProcBlock_MPIAIJ,
2712                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2713                                        MatGetColumnNorms_MPIAIJ,
2714                                        MatInvertBlockDiagonal_MPIAIJ,
2715                                        0,
2716                                        MatCreateSubMatricesMPI_MPIAIJ,
2717                                 /*129*/0,
2718                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2719                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2720                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                 /*134*/0,
2723                                        0,
2724                                        MatRARt_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                 /*139*/MatSetBlockSizes_MPIAIJ,
2728                                        0,
2729                                        0,
2730                                        MatFDColoringSetUp_MPIXAIJ,
2731                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2732                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2733 };
2734 
2735 /* ----------------------------------------------------------------------------------------*/
2736 
2737 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2738 {
2739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2744   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2749 {
2750   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2755   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763 
2764   PetscFunctionBegin;
2765   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   b = (Mat_MPIAIJ*)B->data;
2768 
2769 #if defined(PETSC_USE_CTABLE)
2770   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2771 #else
2772   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2773 #endif
2774   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2775   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2776   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2777 
2778   /* Because the B will have been resized we simply destroy it and create a new one each time */
2779   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2780   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2781   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2782   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2783   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2784   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2785 
2786   if (!B->preallocated) {
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2789     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2790     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2791     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2792   }
2793 
2794   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2795   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2796   B->preallocated  = PETSC_TRUE;
2797   B->was_assembled = PETSC_FALSE;
2798   B->assembled     = PETSC_FALSE;;
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2803 {
2804   Mat_MPIAIJ     *b;
2805   PetscErrorCode ierr;
2806 
2807   PetscFunctionBegin;
2808   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2809   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2810   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2811   b = (Mat_MPIAIJ*)B->data;
2812 
2813 #if defined(PETSC_USE_CTABLE)
2814   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2815 #else
2816   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2817 #endif
2818   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2819   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2820   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2821 
2822   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2823   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2824   B->preallocated  = PETSC_TRUE;
2825   B->was_assembled = PETSC_FALSE;
2826   B->assembled = PETSC_FALSE;
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2831 {
2832   Mat            mat;
2833   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2834   PetscErrorCode ierr;
2835 
2836   PetscFunctionBegin;
2837   *newmat = 0;
2838   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2839   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2840   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2841   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2842   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2843   a       = (Mat_MPIAIJ*)mat->data;
2844 
2845   mat->factortype   = matin->factortype;
2846   mat->assembled    = PETSC_TRUE;
2847   mat->insertmode   = NOT_SET_VALUES;
2848   mat->preallocated = PETSC_TRUE;
2849 
2850   a->size         = oldmat->size;
2851   a->rank         = oldmat->rank;
2852   a->donotstash   = oldmat->donotstash;
2853   a->roworiented  = oldmat->roworiented;
2854   a->rowindices   = 0;
2855   a->rowvalues    = 0;
2856   a->getrowactive = PETSC_FALSE;
2857 
2858   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2859   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2860 
2861   if (oldmat->colmap) {
2862 #if defined(PETSC_USE_CTABLE)
2863     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2864 #else
2865     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2866     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2867     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2868 #endif
2869   } else a->colmap = 0;
2870   if (oldmat->garray) {
2871     PetscInt len;
2872     len  = oldmat->B->cmap->n;
2873     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2874     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2875     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2876   } else a->garray = 0;
2877 
2878   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2879   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2880   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2881   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2882 
2883   if (oldmat->Mvctx_mpi1) {
2884     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2885     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2886   }
2887 
2888   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2889   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2890   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2891   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2892   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2893   *newmat = mat;
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2898 {
2899   PetscScalar    *vals,*svals;
2900   MPI_Comm       comm;
2901   PetscErrorCode ierr;
2902   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2903   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2904   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2905   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2906   PetscInt       cend,cstart,n,*rowners;
2907   int            fd;
2908   PetscInt       bs = newMat->rmap->bs;
2909 
2910   PetscFunctionBegin;
2911   /* force binary viewer to load .info file if it has not yet done so */
2912   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2913   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2914   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2915   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2916   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2917   if (!rank) {
2918     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2919     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2920     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2921   }
2922 
2923   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2924   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2925   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2926   if (bs < 0) bs = 1;
2927 
2928   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2929   M    = header[1]; N = header[2];
2930 
2931   /* If global sizes are set, check if they are consistent with that given in the file */
2932   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2933   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2934 
2935   /* determine ownership of all (block) rows */
2936   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2937   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2938   else m = newMat->rmap->n; /* Set by user */
2939 
2940   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2941   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2942 
2943   /* First process needs enough room for process with most rows */
2944   if (!rank) {
2945     mmax = rowners[1];
2946     for (i=2; i<=size; i++) {
2947       mmax = PetscMax(mmax, rowners[i]);
2948     }
2949   } else mmax = -1;             /* unused, but compilers complain */
2950 
2951   rowners[0] = 0;
2952   for (i=2; i<=size; i++) {
2953     rowners[i] += rowners[i-1];
2954   }
2955   rstart = rowners[rank];
2956   rend   = rowners[rank+1];
2957 
2958   /* distribute row lengths to all processors */
2959   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2960   if (!rank) {
2961     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2962     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2963     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2964     for (j=0; j<m; j++) {
2965       procsnz[0] += ourlens[j];
2966     }
2967     for (i=1; i<size; i++) {
2968       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2969       /* calculate the number of nonzeros on each processor */
2970       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2971         procsnz[i] += rowlengths[j];
2972       }
2973       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2974     }
2975     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2976   } else {
2977     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2978   }
2979 
2980   if (!rank) {
2981     /* determine max buffer needed and allocate it */
2982     maxnz = 0;
2983     for (i=0; i<size; i++) {
2984       maxnz = PetscMax(maxnz,procsnz[i]);
2985     }
2986     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2987 
2988     /* read in my part of the matrix column indices  */
2989     nz   = procsnz[0];
2990     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2991     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2992 
2993     /* read in every one elses and ship off */
2994     for (i=1; i<size; i++) {
2995       nz   = procsnz[i];
2996       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2997       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2998     }
2999     ierr = PetscFree(cols);CHKERRQ(ierr);
3000   } else {
3001     /* determine buffer space needed for message */
3002     nz = 0;
3003     for (i=0; i<m; i++) {
3004       nz += ourlens[i];
3005     }
3006     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3007 
3008     /* receive message of column indices*/
3009     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3010   }
3011 
3012   /* determine column ownership if matrix is not square */
3013   if (N != M) {
3014     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3015     else n = newMat->cmap->n;
3016     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3017     cstart = cend - n;
3018   } else {
3019     cstart = rstart;
3020     cend   = rend;
3021     n      = cend - cstart;
3022   }
3023 
3024   /* loop over local rows, determining number of off diagonal entries */
3025   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3026   jj   = 0;
3027   for (i=0; i<m; i++) {
3028     for (j=0; j<ourlens[i]; j++) {
3029       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3030       jj++;
3031     }
3032   }
3033 
3034   for (i=0; i<m; i++) {
3035     ourlens[i] -= offlens[i];
3036   }
3037   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3038 
3039   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3040 
3041   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3042 
3043   for (i=0; i<m; i++) {
3044     ourlens[i] += offlens[i];
3045   }
3046 
3047   if (!rank) {
3048     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3049 
3050     /* read in my part of the matrix numerical values  */
3051     nz   = procsnz[0];
3052     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3053 
3054     /* insert into matrix */
3055     jj      = rstart;
3056     smycols = mycols;
3057     svals   = vals;
3058     for (i=0; i<m; i++) {
3059       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3060       smycols += ourlens[i];
3061       svals   += ourlens[i];
3062       jj++;
3063     }
3064 
3065     /* read in other processors and ship out */
3066     for (i=1; i<size; i++) {
3067       nz   = procsnz[i];
3068       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3069       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3070     }
3071     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3072   } else {
3073     /* receive numeric values */
3074     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3075 
3076     /* receive message of values*/
3077     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3078 
3079     /* insert into matrix */
3080     jj      = rstart;
3081     smycols = mycols;
3082     svals   = vals;
3083     for (i=0; i<m; i++) {
3084       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3085       smycols += ourlens[i];
3086       svals   += ourlens[i];
3087       jj++;
3088     }
3089   }
3090   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3091   ierr = PetscFree(vals);CHKERRQ(ierr);
3092   ierr = PetscFree(mycols);CHKERRQ(ierr);
3093   ierr = PetscFree(rowners);CHKERRQ(ierr);
3094   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3095   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 /* Not scalable because of ISAllGather() unless getting all columns. */
3100 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3101 {
3102   PetscErrorCode ierr;
3103   IS             iscol_local;
3104   PetscBool      isstride;
3105   PetscMPIInt    lisstride=0,gisstride;
3106 
3107   PetscFunctionBegin;
3108   /* check if we are grabbing all columns*/
3109   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3110 
3111   if (isstride) {
3112     PetscInt  start,len,mstart,mlen;
3113     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3114     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3115     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3116     if (mstart == start && mlen-mstart == len) lisstride = 1;
3117   }
3118 
3119   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3120   if (gisstride) {
3121     PetscInt N;
3122     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3123     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3124     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3125     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3126   } else {
3127     PetscInt cbs;
3128     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3129     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3130     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3131   }
3132 
3133   *isseq = iscol_local;
3134   PetscFunctionReturn(0);
3135 }
3136 
3137 /*
3138  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3139  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3140 
3141  Input Parameters:
3142    mat - matrix
3143    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3144            i.e., mat->rstart <= isrow[i] < mat->rend
3145    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3146            i.e., mat->cstart <= iscol[i] < mat->cend
3147  Output Parameter:
3148    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3149    iscol_o - sequential column index set for retrieving mat->B
3150    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3151  */
3152 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3153 {
3154   PetscErrorCode ierr;
3155   Vec            x,cmap;
3156   const PetscInt *is_idx;
3157   PetscScalar    *xarray,*cmaparray;
3158   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3159   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3160   Mat            B=a->B;
3161   Vec            lvec=a->lvec,lcmap;
3162   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3163   MPI_Comm       comm;
3164   VecScatter     Mvctx=a->Mvctx;
3165 
3166   PetscFunctionBegin;
3167   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3168   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3169 
3170   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3171   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3172   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3173   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3174   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3175 
3176   /* Get start indices */
3177   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3178   isstart -= ncols;
3179   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3180 
3181   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3182   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3183   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3184   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3185   for (i=0; i<ncols; i++) {
3186     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3187     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3188     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3189   }
3190   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3191   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3192   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3193 
3194   /* Get iscol_d */
3195   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3196   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3197   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3198 
3199   /* Get isrow_d */
3200   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3201   rstart = mat->rmap->rstart;
3202   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3203   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3204   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3205   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3206 
3207   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3208   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3209   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3210 
3211   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3212   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3213   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3214 
3215   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3216 
3217   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3218   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3219 
3220   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3221   /* off-process column indices */
3222   count = 0;
3223   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3224   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3225 
3226   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3227   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3228   for (i=0; i<Bn; i++) {
3229     if (PetscRealPart(xarray[i]) > -1.0) {
3230       idx[count]     = i;                   /* local column index in off-diagonal part B */
3231       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3232       count++;
3233     }
3234   }
3235   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3236   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3237 
3238   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3239   /* cannot ensure iscol_o has same blocksize as iscol! */
3240 
3241   ierr = PetscFree(idx);CHKERRQ(ierr);
3242   *garray = cmap1;
3243 
3244   ierr = VecDestroy(&x);CHKERRQ(ierr);
3245   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3246   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3247   PetscFunctionReturn(0);
3248 }
3249 
3250 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3252 {
3253   PetscErrorCode ierr;
3254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3255   Mat            M = NULL;
3256   MPI_Comm       comm;
3257   IS             iscol_d,isrow_d,iscol_o;
3258   Mat            Asub = NULL,Bsub = NULL;
3259   PetscInt       n;
3260 
3261   PetscFunctionBegin;
3262   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3263 
3264   if (call == MAT_REUSE_MATRIX) {
3265     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3266     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3267     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3268 
3269     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3270     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3271 
3272     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3273     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3274 
3275     /* Update diagonal and off-diagonal portions of submat */
3276     asub = (Mat_MPIAIJ*)(*submat)->data;
3277     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3278     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3279     if (n) {
3280       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3281     }
3282     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284 
3285   } else { /* call == MAT_INITIAL_MATRIX) */
3286     const PetscInt *garray;
3287     PetscInt        BsubN;
3288 
3289     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3290     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3291 
3292     /* Create local submatrices Asub and Bsub */
3293     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3294     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3295 
3296     /* Create submatrix M */
3297     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3298 
3299     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3300     asub = (Mat_MPIAIJ*)M->data;
3301 
3302     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3303     n = asub->B->cmap->N;
3304     if (BsubN > n) {
3305       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3306       const PetscInt *idx;
3307       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3308       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3309 
3310       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3311       j = 0;
3312       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3313       for (i=0; i<n; i++) {
3314         if (j >= BsubN) break;
3315         while (subgarray[i] > garray[j]) j++;
3316 
3317         if (subgarray[i] == garray[j]) {
3318           idx_new[i] = idx[j++];
3319         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3320       }
3321       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3322 
3323       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3324       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3325 
3326     } else if (BsubN < n) {
3327       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3328     }
3329 
3330     ierr = PetscFree(garray);CHKERRQ(ierr);
3331     *submat = M;
3332 
3333     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3334     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3335     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3336 
3337     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3338     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3339 
3340     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3341     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3342   }
3343   PetscFunctionReturn(0);
3344 }
3345 
3346 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3347 {
3348   PetscErrorCode ierr;
3349   IS             iscol_local=NULL,isrow_d;
3350   PetscInt       csize;
3351   PetscInt       n,i,j,start,end;
3352   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3353   MPI_Comm       comm;
3354 
3355   PetscFunctionBegin;
3356   /* If isrow has same processor distribution as mat,
3357      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3358   if (call == MAT_REUSE_MATRIX) {
3359     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3360     if (isrow_d) {
3361       sameRowDist  = PETSC_TRUE;
3362       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3363     } else {
3364       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3365       if (iscol_local) {
3366         sameRowDist  = PETSC_TRUE;
3367         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3368       }
3369     }
3370   } else {
3371     /* Check if isrow has same processor distribution as mat */
3372     sameDist[0] = PETSC_FALSE;
3373     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3374     if (!n) {
3375       sameDist[0] = PETSC_TRUE;
3376     } else {
3377       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3378       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3379       if (i >= start && j < end) {
3380         sameDist[0] = PETSC_TRUE;
3381       }
3382     }
3383 
3384     /* Check if iscol has same processor distribution as mat */
3385     sameDist[1] = PETSC_FALSE;
3386     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3387     if (!n) {
3388       sameDist[1] = PETSC_TRUE;
3389     } else {
3390       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3391       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3392       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3393     }
3394 
3395     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3396     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3397     sameRowDist = tsameDist[0];
3398   }
3399 
3400   if (sameRowDist) {
3401     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3402       /* isrow and iscol have same processor distribution as mat */
3403       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3404       PetscFunctionReturn(0);
3405     } else { /* sameRowDist */
3406       /* isrow has same processor distribution as mat */
3407       if (call == MAT_INITIAL_MATRIX) {
3408         PetscBool sorted;
3409         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3410         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3411         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3412         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3413 
3414         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3415         if (sorted) {
3416           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3417           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3418           PetscFunctionReturn(0);
3419         }
3420       } else { /* call == MAT_REUSE_MATRIX */
3421         IS    iscol_sub;
3422         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3423         if (iscol_sub) {
3424           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3425           PetscFunctionReturn(0);
3426         }
3427       }
3428     }
3429   }
3430 
3431   /* General case: iscol -> iscol_local which has global size of iscol */
3432   if (call == MAT_REUSE_MATRIX) {
3433     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3434     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3435   } else {
3436     if (!iscol_local) {
3437       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3438     }
3439   }
3440 
3441   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3442   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3443 
3444   if (call == MAT_INITIAL_MATRIX) {
3445     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3446     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3447   }
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 /*@C
3452      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3453          and "off-diagonal" part of the matrix in CSR format.
3454 
3455    Collective on MPI_Comm
3456 
3457    Input Parameters:
3458 +  comm - MPI communicator
3459 .  A - "diagonal" portion of matrix
3460 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3461 -  garray - global index of B columns
3462 
3463    Output Parameter:
3464 .   mat - the matrix, with input A as its local diagonal matrix
3465    Level: advanced
3466 
3467    Notes:
3468        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3469        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3470 
3471 .seealso: MatCreateMPIAIJWithSplitArrays()
3472 @*/
3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3474 {
3475   PetscErrorCode ierr;
3476   Mat_MPIAIJ     *maij;
3477   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3478   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3479   PetscScalar    *oa=b->a;
3480   Mat            Bnew;
3481   PetscInt       m,n,N;
3482 
3483   PetscFunctionBegin;
3484   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3485   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3486   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3487   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3488   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3489   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3490 
3491   /* Get global columns of mat */
3492   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3493 
3494   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3495   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3496   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3497   maij = (Mat_MPIAIJ*)(*mat)->data;
3498 
3499   (*mat)->preallocated = PETSC_TRUE;
3500 
3501   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3502   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3503 
3504   /* Set A as diagonal portion of *mat */
3505   maij->A = A;
3506 
3507   nz = oi[m];
3508   for (i=0; i<nz; i++) {
3509     col   = oj[i];
3510     oj[i] = garray[col];
3511   }
3512 
3513    /* Set Bnew as off-diagonal portion of *mat */
3514   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3515   bnew        = (Mat_SeqAIJ*)Bnew->data;
3516   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3517   maij->B     = Bnew;
3518 
3519   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3520 
3521   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3522   b->free_a       = PETSC_FALSE;
3523   b->free_ij      = PETSC_FALSE;
3524   ierr = MatDestroy(&B);CHKERRQ(ierr);
3525 
3526   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3527   bnew->free_a       = PETSC_TRUE;
3528   bnew->free_ij      = PETSC_TRUE;
3529 
3530   /* condense columns of maij->B */
3531   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3532   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3535   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3540 
3541 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3542 {
3543   PetscErrorCode ierr;
3544   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3545   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3546   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3547   Mat            M,Msub,B=a->B;
3548   MatScalar      *aa;
3549   Mat_SeqAIJ     *aij;
3550   PetscInt       *garray = a->garray,*colsub,Ncols;
3551   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3552   IS             iscol_sub,iscmap;
3553   const PetscInt *is_idx,*cmap;
3554   PetscBool      allcolumns=PETSC_FALSE;
3555   MPI_Comm       comm;
3556 
3557   PetscFunctionBegin;
3558   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3559 
3560   if (call == MAT_REUSE_MATRIX) {
3561     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3562     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3563     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3564 
3565     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3566     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3567 
3568     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3569     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3570 
3571     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3572 
3573   } else { /* call == MAT_INITIAL_MATRIX) */
3574     PetscBool flg;
3575 
3576     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3577     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3578 
3579     /* (1) iscol -> nonscalable iscol_local */
3580     /* Check for special case: each processor gets entire matrix columns */
3581     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3582     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3583     if (allcolumns) {
3584       iscol_sub = iscol_local;
3585       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3586       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3587 
3588     } else {
3589       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3590       PetscInt *idx,*cmap1,k;
3591       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3592       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3593       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3594       count = 0;
3595       k     = 0;
3596       for (i=0; i<Ncols; i++) {
3597         j = is_idx[i];
3598         if (j >= cstart && j < cend) {
3599           /* diagonal part of mat */
3600           idx[count]     = j;
3601           cmap1[count++] = i; /* column index in submat */
3602         } else if (Bn) {
3603           /* off-diagonal part of mat */
3604           if (j == garray[k]) {
3605             idx[count]     = j;
3606             cmap1[count++] = i;  /* column index in submat */
3607           } else if (j > garray[k]) {
3608             while (j > garray[k] && k < Bn-1) k++;
3609             if (j == garray[k]) {
3610               idx[count]     = j;
3611               cmap1[count++] = i; /* column index in submat */
3612             }
3613           }
3614         }
3615       }
3616       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3617 
3618       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3619       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3620       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3621 
3622       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3623     }
3624 
3625     /* (3) Create sequential Msub */
3626     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3627   }
3628 
3629   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3630   aij  = (Mat_SeqAIJ*)(Msub)->data;
3631   ii   = aij->i;
3632   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3633 
3634   /*
3635       m - number of local rows
3636       Ncols - number of columns (same on all processors)
3637       rstart - first row in new global matrix generated
3638   */
3639   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3640 
3641   if (call == MAT_INITIAL_MATRIX) {
3642     /* (4) Create parallel newmat */
3643     PetscMPIInt    rank,size;
3644     PetscInt       csize;
3645 
3646     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3647     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3648 
3649     /*
3650         Determine the number of non-zeros in the diagonal and off-diagonal
3651         portions of the matrix in order to do correct preallocation
3652     */
3653 
3654     /* first get start and end of "diagonal" columns */
3655     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3656     if (csize == PETSC_DECIDE) {
3657       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3658       if (mglobal == Ncols) { /* square matrix */
3659         nlocal = m;
3660       } else {
3661         nlocal = Ncols/size + ((Ncols % size) > rank);
3662       }
3663     } else {
3664       nlocal = csize;
3665     }
3666     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3667     rstart = rend - nlocal;
3668     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3669 
3670     /* next, compute all the lengths */
3671     jj    = aij->j;
3672     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3673     olens = dlens + m;
3674     for (i=0; i<m; i++) {
3675       jend = ii[i+1] - ii[i];
3676       olen = 0;
3677       dlen = 0;
3678       for (j=0; j<jend; j++) {
3679         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3680         else dlen++;
3681         jj++;
3682       }
3683       olens[i] = olen;
3684       dlens[i] = dlen;
3685     }
3686 
3687     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3688     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3689 
3690     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3691     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3692     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3693     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3694     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3695     ierr = PetscFree(dlens);CHKERRQ(ierr);
3696 
3697   } else { /* call == MAT_REUSE_MATRIX */
3698     M    = *newmat;
3699     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3700     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3701     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3702     /*
3703          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3704        rather than the slower MatSetValues().
3705     */
3706     M->was_assembled = PETSC_TRUE;
3707     M->assembled     = PETSC_FALSE;
3708   }
3709 
3710   /* (5) Set values of Msub to *newmat */
3711   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3712   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3713 
3714   jj   = aij->j;
3715   aa   = aij->a;
3716   for (i=0; i<m; i++) {
3717     row = rstart + i;
3718     nz  = ii[i+1] - ii[i];
3719     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3720     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3721     jj += nz; aa += nz;
3722   }
3723   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3724 
3725   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3726   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3727 
3728   ierr = PetscFree(colsub);CHKERRQ(ierr);
3729 
3730   /* save Msub, iscol_sub and iscmap used in processor for next request */
3731   if (call ==  MAT_INITIAL_MATRIX) {
3732     *newmat = M;
3733     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3734     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3735 
3736     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3737     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3738 
3739     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3740     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3741 
3742     if (iscol_local) {
3743       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3744       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3745     }
3746   }
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 /*
3751     Not great since it makes two copies of the submatrix, first an SeqAIJ
3752   in local and then by concatenating the local matrices the end result.
3753   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3754 
3755   Note: This requires a sequential iscol with all indices.
3756 */
3757 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3758 {
3759   PetscErrorCode ierr;
3760   PetscMPIInt    rank,size;
3761   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3762   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3763   Mat            M,Mreuse;
3764   MatScalar      *aa,*vwork;
3765   MPI_Comm       comm;
3766   Mat_SeqAIJ     *aij;
3767   PetscBool      colflag,allcolumns=PETSC_FALSE;
3768 
3769   PetscFunctionBegin;
3770   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3771   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3772   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3773 
3774   /* Check for special case: each processor gets entire matrix columns */
3775   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3776   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3777   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3778 
3779   if (call ==  MAT_REUSE_MATRIX) {
3780     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3781     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3782     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3783   } else {
3784     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3785   }
3786 
3787   /*
3788       m - number of local rows
3789       n - number of columns (same on all processors)
3790       rstart - first row in new global matrix generated
3791   */
3792   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3793   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3794   if (call == MAT_INITIAL_MATRIX) {
3795     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3796     ii  = aij->i;
3797     jj  = aij->j;
3798 
3799     /*
3800         Determine the number of non-zeros in the diagonal and off-diagonal
3801         portions of the matrix in order to do correct preallocation
3802     */
3803 
3804     /* first get start and end of "diagonal" columns */
3805     if (csize == PETSC_DECIDE) {
3806       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3807       if (mglobal == n) { /* square matrix */
3808         nlocal = m;
3809       } else {
3810         nlocal = n/size + ((n % size) > rank);
3811       }
3812     } else {
3813       nlocal = csize;
3814     }
3815     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3816     rstart = rend - nlocal;
3817     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3818 
3819     /* next, compute all the lengths */
3820     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3821     olens = dlens + m;
3822     for (i=0; i<m; i++) {
3823       jend = ii[i+1] - ii[i];
3824       olen = 0;
3825       dlen = 0;
3826       for (j=0; j<jend; j++) {
3827         if (*jj < rstart || *jj >= rend) olen++;
3828         else dlen++;
3829         jj++;
3830       }
3831       olens[i] = olen;
3832       dlens[i] = dlen;
3833     }
3834     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3835     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3836     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3837     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3838     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3839     ierr = PetscFree(dlens);CHKERRQ(ierr);
3840   } else {
3841     PetscInt ml,nl;
3842 
3843     M    = *newmat;
3844     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3845     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3846     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3847     /*
3848          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3849        rather than the slower MatSetValues().
3850     */
3851     M->was_assembled = PETSC_TRUE;
3852     M->assembled     = PETSC_FALSE;
3853   }
3854   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3855   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3856   ii   = aij->i;
3857   jj   = aij->j;
3858   aa   = aij->a;
3859   for (i=0; i<m; i++) {
3860     row   = rstart + i;
3861     nz    = ii[i+1] - ii[i];
3862     cwork = jj;     jj += nz;
3863     vwork = aa;     aa += nz;
3864     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3865   }
3866 
3867   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3869   *newmat = M;
3870 
3871   /* save submatrix used in processor for next request */
3872   if (call ==  MAT_INITIAL_MATRIX) {
3873     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3874     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3875   }
3876   PetscFunctionReturn(0);
3877 }
3878 
3879 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3880 {
3881   PetscInt       m,cstart, cend,j,nnz,i,d;
3882   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3883   const PetscInt *JJ;
3884   PetscScalar    *values;
3885   PetscErrorCode ierr;
3886   PetscBool      nooffprocentries;
3887 
3888   PetscFunctionBegin;
3889   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3890 
3891   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3892   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3893   m      = B->rmap->n;
3894   cstart = B->cmap->rstart;
3895   cend   = B->cmap->rend;
3896   rstart = B->rmap->rstart;
3897 
3898   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3899 
3900 #if defined(PETSC_USE_DEBUG)
3901   for (i=0; i<m; i++) {
3902     nnz = Ii[i+1]- Ii[i];
3903     JJ  = J + Ii[i];
3904     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3905     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3906     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3907   }
3908 #endif
3909 
3910   for (i=0; i<m; i++) {
3911     nnz     = Ii[i+1]- Ii[i];
3912     JJ      = J + Ii[i];
3913     nnz_max = PetscMax(nnz_max,nnz);
3914     d       = 0;
3915     for (j=0; j<nnz; j++) {
3916       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3917     }
3918     d_nnz[i] = d;
3919     o_nnz[i] = nnz - d;
3920   }
3921   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3922   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3923 
3924   if (v) values = (PetscScalar*)v;
3925   else {
3926     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3927   }
3928 
3929   for (i=0; i<m; i++) {
3930     ii   = i + rstart;
3931     nnz  = Ii[i+1]- Ii[i];
3932     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3933   }
3934   nooffprocentries    = B->nooffprocentries;
3935   B->nooffprocentries = PETSC_TRUE;
3936   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3938   B->nooffprocentries = nooffprocentries;
3939 
3940   if (!v) {
3941     ierr = PetscFree(values);CHKERRQ(ierr);
3942   }
3943   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3944   PetscFunctionReturn(0);
3945 }
3946 
3947 /*@
3948    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3949    (the default parallel PETSc format).
3950 
3951    Collective on MPI_Comm
3952 
3953    Input Parameters:
3954 +  B - the matrix
3955 .  i - the indices into j for the start of each local row (starts with zero)
3956 .  j - the column indices for each local row (starts with zero)
3957 -  v - optional values in the matrix
3958 
3959    Level: developer
3960 
3961    Notes:
3962        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3963      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3964      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3965 
3966        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3967 
3968        The format which is used for the sparse matrix input, is equivalent to a
3969     row-major ordering.. i.e for the following matrix, the input data expected is
3970     as shown
3971 
3972 $        1 0 0
3973 $        2 0 3     P0
3974 $       -------
3975 $        4 5 6     P1
3976 $
3977 $     Process0 [P0]: rows_owned=[0,1]
3978 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3979 $        j =  {0,0,2}  [size = 3]
3980 $        v =  {1,2,3}  [size = 3]
3981 $
3982 $     Process1 [P1]: rows_owned=[2]
3983 $        i =  {0,3}    [size = nrow+1  = 1+1]
3984 $        j =  {0,1,2}  [size = 3]
3985 $        v =  {4,5,6}  [size = 3]
3986 
3987 .keywords: matrix, aij, compressed row, sparse, parallel
3988 
3989 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3990           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3991 @*/
3992 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3993 {
3994   PetscErrorCode ierr;
3995 
3996   PetscFunctionBegin;
3997   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3998   PetscFunctionReturn(0);
3999 }
4000 
4001 /*@C
4002    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4003    (the default parallel PETSc format).  For good matrix assembly performance
4004    the user should preallocate the matrix storage by setting the parameters
4005    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4006    performance can be increased by more than a factor of 50.
4007 
4008    Collective on MPI_Comm
4009 
4010    Input Parameters:
4011 +  B - the matrix
4012 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4013            (same value is used for all local rows)
4014 .  d_nnz - array containing the number of nonzeros in the various rows of the
4015            DIAGONAL portion of the local submatrix (possibly different for each row)
4016            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4017            The size of this array is equal to the number of local rows, i.e 'm'.
4018            For matrices that will be factored, you must leave room for (and set)
4019            the diagonal entry even if it is zero.
4020 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4021            submatrix (same value is used for all local rows).
4022 -  o_nnz - array containing the number of nonzeros in the various rows of the
4023            OFF-DIAGONAL portion of the local submatrix (possibly different for
4024            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4025            structure. The size of this array is equal to the number
4026            of local rows, i.e 'm'.
4027 
4028    If the *_nnz parameter is given then the *_nz parameter is ignored
4029 
4030    The AIJ format (also called the Yale sparse matrix format or
4031    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4032    storage.  The stored row and column indices begin with zero.
4033    See Users-Manual: ch_mat for details.
4034 
4035    The parallel matrix is partitioned such that the first m0 rows belong to
4036    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4037    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4038 
4039    The DIAGONAL portion of the local submatrix of a processor can be defined
4040    as the submatrix which is obtained by extraction the part corresponding to
4041    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4042    first row that belongs to the processor, r2 is the last row belonging to
4043    the this processor, and c1-c2 is range of indices of the local part of a
4044    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4045    common case of a square matrix, the row and column ranges are the same and
4046    the DIAGONAL part is also square. The remaining portion of the local
4047    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4048 
4049    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4050 
4051    You can call MatGetInfo() to get information on how effective the preallocation was;
4052    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4053    You can also run with the option -info and look for messages with the string
4054    malloc in them to see if additional memory allocation was needed.
4055 
4056    Example usage:
4057 
4058    Consider the following 8x8 matrix with 34 non-zero values, that is
4059    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4060    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4061    as follows:
4062 
4063 .vb
4064             1  2  0  |  0  3  0  |  0  4
4065     Proc0   0  5  6  |  7  0  0  |  8  0
4066             9  0 10  | 11  0  0  | 12  0
4067     -------------------------------------
4068            13  0 14  | 15 16 17  |  0  0
4069     Proc1   0 18  0  | 19 20 21  |  0  0
4070             0  0  0  | 22 23  0  | 24  0
4071     -------------------------------------
4072     Proc2  25 26 27  |  0  0 28  | 29  0
4073            30  0  0  | 31 32 33  |  0 34
4074 .ve
4075 
4076    This can be represented as a collection of submatrices as:
4077 
4078 .vb
4079       A B C
4080       D E F
4081       G H I
4082 .ve
4083 
4084    Where the submatrices A,B,C are owned by proc0, D,E,F are
4085    owned by proc1, G,H,I are owned by proc2.
4086 
4087    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4088    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4089    The 'M','N' parameters are 8,8, and have the same values on all procs.
4090 
4091    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4092    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4093    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4094    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4095    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4096    matrix, ans [DF] as another SeqAIJ matrix.
4097 
4098    When d_nz, o_nz parameters are specified, d_nz storage elements are
4099    allocated for every row of the local diagonal submatrix, and o_nz
4100    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4101    One way to choose d_nz and o_nz is to use the max nonzerors per local
4102    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4103    In this case, the values of d_nz,o_nz are:
4104 .vb
4105      proc0 : dnz = 2, o_nz = 2
4106      proc1 : dnz = 3, o_nz = 2
4107      proc2 : dnz = 1, o_nz = 4
4108 .ve
4109    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4110    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4111    for proc3. i.e we are using 12+15+10=37 storage locations to store
4112    34 values.
4113 
4114    When d_nnz, o_nnz parameters are specified, the storage is specified
4115    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4116    In the above case the values for d_nnz,o_nnz are:
4117 .vb
4118      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4119      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4120      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4121 .ve
4122    Here the space allocated is sum of all the above values i.e 34, and
4123    hence pre-allocation is perfect.
4124 
4125    Level: intermediate
4126 
4127 .keywords: matrix, aij, compressed row, sparse, parallel
4128 
4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4130           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4131 @*/
4132 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4133 {
4134   PetscErrorCode ierr;
4135 
4136   PetscFunctionBegin;
4137   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4138   PetscValidType(B,1);
4139   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4140   PetscFunctionReturn(0);
4141 }
4142 
4143 /*@
4144      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4145          CSR format the local rows.
4146 
4147    Collective on MPI_Comm
4148 
4149    Input Parameters:
4150 +  comm - MPI communicator
4151 .  m - number of local rows (Cannot be PETSC_DECIDE)
4152 .  n - This value should be the same as the local size used in creating the
4153        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4154        calculated if N is given) For square matrices n is almost always m.
4155 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4156 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4157 .   i - row indices
4158 .   j - column indices
4159 -   a - matrix values
4160 
4161    Output Parameter:
4162 .   mat - the matrix
4163 
4164    Level: intermediate
4165 
4166    Notes:
4167        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4168      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4169      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4170 
4171        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4172 
4173        The format which is used for the sparse matrix input, is equivalent to a
4174     row-major ordering.. i.e for the following matrix, the input data expected is
4175     as shown
4176 
4177 $        1 0 0
4178 $        2 0 3     P0
4179 $       -------
4180 $        4 5 6     P1
4181 $
4182 $     Process0 [P0]: rows_owned=[0,1]
4183 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4184 $        j =  {0,0,2}  [size = 3]
4185 $        v =  {1,2,3}  [size = 3]
4186 $
4187 $     Process1 [P1]: rows_owned=[2]
4188 $        i =  {0,3}    [size = nrow+1  = 1+1]
4189 $        j =  {0,1,2}  [size = 3]
4190 $        v =  {4,5,6}  [size = 3]
4191 
4192 .keywords: matrix, aij, compressed row, sparse, parallel
4193 
4194 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4195           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4196 @*/
4197 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4198 {
4199   PetscErrorCode ierr;
4200 
4201   PetscFunctionBegin;
4202   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4203   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4204   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4205   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4206   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4207   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4208   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4209   PetscFunctionReturn(0);
4210 }
4211 
4212 /*@C
4213    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4214    (the default parallel PETSc format).  For good matrix assembly performance
4215    the user should preallocate the matrix storage by setting the parameters
4216    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4217    performance can be increased by more than a factor of 50.
4218 
4219    Collective on MPI_Comm
4220 
4221    Input Parameters:
4222 +  comm - MPI communicator
4223 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4224            This value should be the same as the local size used in creating the
4225            y vector for the matrix-vector product y = Ax.
4226 .  n - This value should be the same as the local size used in creating the
4227        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4228        calculated if N is given) For square matrices n is almost always m.
4229 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4230 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4231 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4232            (same value is used for all local rows)
4233 .  d_nnz - array containing the number of nonzeros in the various rows of the
4234            DIAGONAL portion of the local submatrix (possibly different for each row)
4235            or NULL, if d_nz is used to specify the nonzero structure.
4236            The size of this array is equal to the number of local rows, i.e 'm'.
4237 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4238            submatrix (same value is used for all local rows).
4239 -  o_nnz - array containing the number of nonzeros in the various rows of the
4240            OFF-DIAGONAL portion of the local submatrix (possibly different for
4241            each row) or NULL, if o_nz is used to specify the nonzero
4242            structure. The size of this array is equal to the number
4243            of local rows, i.e 'm'.
4244 
4245    Output Parameter:
4246 .  A - the matrix
4247 
4248    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4249    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4250    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4251 
4252    Notes:
4253    If the *_nnz parameter is given then the *_nz parameter is ignored
4254 
4255    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4256    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4257    storage requirements for this matrix.
4258 
4259    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4260    processor than it must be used on all processors that share the object for
4261    that argument.
4262 
4263    The user MUST specify either the local or global matrix dimensions
4264    (possibly both).
4265 
4266    The parallel matrix is partitioned across processors such that the
4267    first m0 rows belong to process 0, the next m1 rows belong to
4268    process 1, the next m2 rows belong to process 2 etc.. where
4269    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4270    values corresponding to [m x N] submatrix.
4271 
4272    The columns are logically partitioned with the n0 columns belonging
4273    to 0th partition, the next n1 columns belonging to the next
4274    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4275 
4276    The DIAGONAL portion of the local submatrix on any given processor
4277    is the submatrix corresponding to the rows and columns m,n
4278    corresponding to the given processor. i.e diagonal matrix on
4279    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4280    etc. The remaining portion of the local submatrix [m x (N-n)]
4281    constitute the OFF-DIAGONAL portion. The example below better
4282    illustrates this concept.
4283 
4284    For a square global matrix we define each processor's diagonal portion
4285    to be its local rows and the corresponding columns (a square submatrix);
4286    each processor's off-diagonal portion encompasses the remainder of the
4287    local matrix (a rectangular submatrix).
4288 
4289    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4290 
4291    When calling this routine with a single process communicator, a matrix of
4292    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4293    type of communicator, use the construction mechanism
4294 .vb
4295      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4296 .ve
4297 
4298 $     MatCreate(...,&A);
4299 $     MatSetType(A,MATMPIAIJ);
4300 $     MatSetSizes(A, m,n,M,N);
4301 $     MatMPIAIJSetPreallocation(A,...);
4302 
4303    By default, this format uses inodes (identical nodes) when possible.
4304    We search for consecutive rows with the same nonzero structure, thereby
4305    reusing matrix information to achieve increased efficiency.
4306 
4307    Options Database Keys:
4308 +  -mat_no_inode  - Do not use inodes
4309 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4310 
4311 
4312 
4313    Example usage:
4314 
4315    Consider the following 8x8 matrix with 34 non-zero values, that is
4316    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4317    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4318    as follows
4319 
4320 .vb
4321             1  2  0  |  0  3  0  |  0  4
4322     Proc0   0  5  6  |  7  0  0  |  8  0
4323             9  0 10  | 11  0  0  | 12  0
4324     -------------------------------------
4325            13  0 14  | 15 16 17  |  0  0
4326     Proc1   0 18  0  | 19 20 21  |  0  0
4327             0  0  0  | 22 23  0  | 24  0
4328     -------------------------------------
4329     Proc2  25 26 27  |  0  0 28  | 29  0
4330            30  0  0  | 31 32 33  |  0 34
4331 .ve
4332 
4333    This can be represented as a collection of submatrices as
4334 
4335 .vb
4336       A B C
4337       D E F
4338       G H I
4339 .ve
4340 
4341    Where the submatrices A,B,C are owned by proc0, D,E,F are
4342    owned by proc1, G,H,I are owned by proc2.
4343 
4344    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4345    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4346    The 'M','N' parameters are 8,8, and have the same values on all procs.
4347 
4348    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4349    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4350    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4351    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4352    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4353    matrix, ans [DF] as another SeqAIJ matrix.
4354 
4355    When d_nz, o_nz parameters are specified, d_nz storage elements are
4356    allocated for every row of the local diagonal submatrix, and o_nz
4357    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4358    One way to choose d_nz and o_nz is to use the max nonzerors per local
4359    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4360    In this case, the values of d_nz,o_nz are
4361 .vb
4362      proc0 : dnz = 2, o_nz = 2
4363      proc1 : dnz = 3, o_nz = 2
4364      proc2 : dnz = 1, o_nz = 4
4365 .ve
4366    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4367    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4368    for proc3. i.e we are using 12+15+10=37 storage locations to store
4369    34 values.
4370 
4371    When d_nnz, o_nnz parameters are specified, the storage is specified
4372    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4373    In the above case the values for d_nnz,o_nnz are
4374 .vb
4375      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4376      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4377      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4378 .ve
4379    Here the space allocated is sum of all the above values i.e 34, and
4380    hence pre-allocation is perfect.
4381 
4382    Level: intermediate
4383 
4384 .keywords: matrix, aij, compressed row, sparse, parallel
4385 
4386 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4387           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4388 @*/
4389 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4390 {
4391   PetscErrorCode ierr;
4392   PetscMPIInt    size;
4393 
4394   PetscFunctionBegin;
4395   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4396   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4397   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4398   if (size > 1) {
4399     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4400     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4401   } else {
4402     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4403     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4404   }
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4409 {
4410   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4411   PetscBool      flg;
4412   PetscErrorCode ierr;
4413 
4414   PetscFunctionBegin;
4415   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4416   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4417   if (Ad)     *Ad     = a->A;
4418   if (Ao)     *Ao     = a->B;
4419   if (colmap) *colmap = a->garray;
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4424 {
4425   PetscErrorCode ierr;
4426   PetscInt       m,N,i,rstart,nnz,Ii;
4427   PetscInt       *indx;
4428   PetscScalar    *values;
4429 
4430   PetscFunctionBegin;
4431   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4432   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4433     PetscInt       *dnz,*onz,sum,bs,cbs;
4434 
4435     if (n == PETSC_DECIDE) {
4436       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4437     }
4438     /* Check sum(n) = N */
4439     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4440     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4441 
4442     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4443     rstart -= m;
4444 
4445     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4446     for (i=0; i<m; i++) {
4447       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4448       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4449       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4450     }
4451 
4452     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4453     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4454     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4455     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4456     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4457     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4458     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4459     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4460   }
4461 
4462   /* numeric phase */
4463   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4464   for (i=0; i<m; i++) {
4465     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4466     Ii   = i + rstart;
4467     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4468     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4469   }
4470   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4471   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4476 {
4477   PetscErrorCode    ierr;
4478   PetscMPIInt       rank;
4479   PetscInt          m,N,i,rstart,nnz;
4480   size_t            len;
4481   const PetscInt    *indx;
4482   PetscViewer       out;
4483   char              *name;
4484   Mat               B;
4485   const PetscScalar *values;
4486 
4487   PetscFunctionBegin;
4488   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4489   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4490   /* Should this be the type of the diagonal block of A? */
4491   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4492   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4493   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4494   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4495   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4496   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4497   for (i=0; i<m; i++) {
4498     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4499     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4500     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4501   }
4502   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4503   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4504 
4505   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4506   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4507   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4508   sprintf(name,"%s.%d",outfile,rank);
4509   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4510   ierr = PetscFree(name);CHKERRQ(ierr);
4511   ierr = MatView(B,out);CHKERRQ(ierr);
4512   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4513   ierr = MatDestroy(&B);CHKERRQ(ierr);
4514   PetscFunctionReturn(0);
4515 }
4516 
4517 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4518 {
4519   PetscErrorCode      ierr;
4520   Mat_Merge_SeqsToMPI *merge;
4521   PetscContainer      container;
4522 
4523   PetscFunctionBegin;
4524   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4525   if (container) {
4526     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4536     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4537     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4538     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4539     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4540     ierr = PetscFree(merge);CHKERRQ(ierr);
4541     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4542   }
4543   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4544   PetscFunctionReturn(0);
4545 }
4546 
4547 #include <../src/mat/utils/freespace.h>
4548 #include <petscbt.h>
4549 
4550 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4551 {
4552   PetscErrorCode      ierr;
4553   MPI_Comm            comm;
4554   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4555   PetscMPIInt         size,rank,taga,*len_s;
4556   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4557   PetscInt            proc,m;
4558   PetscInt            **buf_ri,**buf_rj;
4559   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4560   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4561   MPI_Request         *s_waits,*r_waits;
4562   MPI_Status          *status;
4563   MatScalar           *aa=a->a;
4564   MatScalar           **abuf_r,*ba_i;
4565   Mat_Merge_SeqsToMPI *merge;
4566   PetscContainer      container;
4567 
4568   PetscFunctionBegin;
4569   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4570   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4571 
4572   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4573   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4574 
4575   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4576   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4577 
4578   bi     = merge->bi;
4579   bj     = merge->bj;
4580   buf_ri = merge->buf_ri;
4581   buf_rj = merge->buf_rj;
4582 
4583   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4584   owners = merge->rowmap->range;
4585   len_s  = merge->len_s;
4586 
4587   /* send and recv matrix values */
4588   /*-----------------------------*/
4589   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4590   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4591 
4592   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4593   for (proc=0,k=0; proc<size; proc++) {
4594     if (!len_s[proc]) continue;
4595     i    = owners[proc];
4596     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4597     k++;
4598   }
4599 
4600   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4601   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4602   ierr = PetscFree(status);CHKERRQ(ierr);
4603 
4604   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4605   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4606 
4607   /* insert mat values of mpimat */
4608   /*----------------------------*/
4609   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4610   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4611 
4612   for (k=0; k<merge->nrecv; k++) {
4613     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4614     nrows       = *(buf_ri_k[k]);
4615     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4616     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4617   }
4618 
4619   /* set values of ba */
4620   m = merge->rowmap->n;
4621   for (i=0; i<m; i++) {
4622     arow = owners[rank] + i;
4623     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4624     bnzi = bi[i+1] - bi[i];
4625     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4626 
4627     /* add local non-zero vals of this proc's seqmat into ba */
4628     anzi   = ai[arow+1] - ai[arow];
4629     aj     = a->j + ai[arow];
4630     aa     = a->a + ai[arow];
4631     nextaj = 0;
4632     for (j=0; nextaj<anzi; j++) {
4633       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4634         ba_i[j] += aa[nextaj++];
4635       }
4636     }
4637 
4638     /* add received vals into ba */
4639     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4640       /* i-th row */
4641       if (i == *nextrow[k]) {
4642         anzi   = *(nextai[k]+1) - *nextai[k];
4643         aj     = buf_rj[k] + *(nextai[k]);
4644         aa     = abuf_r[k] + *(nextai[k]);
4645         nextaj = 0;
4646         for (j=0; nextaj<anzi; j++) {
4647           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4648             ba_i[j] += aa[nextaj++];
4649           }
4650         }
4651         nextrow[k]++; nextai[k]++;
4652       }
4653     }
4654     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4655   }
4656   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4657   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4658 
4659   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4660   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4661   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4662   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4663   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4664   PetscFunctionReturn(0);
4665 }
4666 
4667 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4668 {
4669   PetscErrorCode      ierr;
4670   Mat                 B_mpi;
4671   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4672   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4673   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4674   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4675   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4676   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4677   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4678   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4679   MPI_Status          *status;
4680   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4681   PetscBT             lnkbt;
4682   Mat_Merge_SeqsToMPI *merge;
4683   PetscContainer      container;
4684 
4685   PetscFunctionBegin;
4686   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4687 
4688   /* make sure it is a PETSc comm */
4689   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4691   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4692 
4693   ierr = PetscNew(&merge);CHKERRQ(ierr);
4694   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4695 
4696   /* determine row ownership */
4697   /*---------------------------------------------------------*/
4698   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4699   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4700   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4701   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4702   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4703   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4704   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4705 
4706   m      = merge->rowmap->n;
4707   owners = merge->rowmap->range;
4708 
4709   /* determine the number of messages to send, their lengths */
4710   /*---------------------------------------------------------*/
4711   len_s = merge->len_s;
4712 
4713   len          = 0; /* length of buf_si[] */
4714   merge->nsend = 0;
4715   for (proc=0; proc<size; proc++) {
4716     len_si[proc] = 0;
4717     if (proc == rank) {
4718       len_s[proc] = 0;
4719     } else {
4720       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4721       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4722     }
4723     if (len_s[proc]) {
4724       merge->nsend++;
4725       nrows = 0;
4726       for (i=owners[proc]; i<owners[proc+1]; i++) {
4727         if (ai[i+1] > ai[i]) nrows++;
4728       }
4729       len_si[proc] = 2*(nrows+1);
4730       len         += len_si[proc];
4731     }
4732   }
4733 
4734   /* determine the number and length of messages to receive for ij-structure */
4735   /*-------------------------------------------------------------------------*/
4736   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4737   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4738 
4739   /* post the Irecv of j-structure */
4740   /*-------------------------------*/
4741   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4742   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4743 
4744   /* post the Isend of j-structure */
4745   /*--------------------------------*/
4746   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4747 
4748   for (proc=0, k=0; proc<size; proc++) {
4749     if (!len_s[proc]) continue;
4750     i    = owners[proc];
4751     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4752     k++;
4753   }
4754 
4755   /* receives and sends of j-structure are complete */
4756   /*------------------------------------------------*/
4757   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4758   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4759 
4760   /* send and recv i-structure */
4761   /*---------------------------*/
4762   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4763   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4764 
4765   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4766   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4767   for (proc=0,k=0; proc<size; proc++) {
4768     if (!len_s[proc]) continue;
4769     /* form outgoing message for i-structure:
4770          buf_si[0]:                 nrows to be sent
4771                [1:nrows]:           row index (global)
4772                [nrows+1:2*nrows+1]: i-structure index
4773     */
4774     /*-------------------------------------------*/
4775     nrows       = len_si[proc]/2 - 1;
4776     buf_si_i    = buf_si + nrows+1;
4777     buf_si[0]   = nrows;
4778     buf_si_i[0] = 0;
4779     nrows       = 0;
4780     for (i=owners[proc]; i<owners[proc+1]; i++) {
4781       anzi = ai[i+1] - ai[i];
4782       if (anzi) {
4783         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4784         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4785         nrows++;
4786       }
4787     }
4788     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4789     k++;
4790     buf_si += len_si[proc];
4791   }
4792 
4793   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4794   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4795 
4796   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4797   for (i=0; i<merge->nrecv; i++) {
4798     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4799   }
4800 
4801   ierr = PetscFree(len_si);CHKERRQ(ierr);
4802   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4803   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4804   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4805   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4806   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4807   ierr = PetscFree(status);CHKERRQ(ierr);
4808 
4809   /* compute a local seq matrix in each processor */
4810   /*----------------------------------------------*/
4811   /* allocate bi array and free space for accumulating nonzero column info */
4812   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4813   bi[0] = 0;
4814 
4815   /* create and initialize a linked list */
4816   nlnk = N+1;
4817   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4818 
4819   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4820   len  = ai[owners[rank+1]] - ai[owners[rank]];
4821   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4822 
4823   current_space = free_space;
4824 
4825   /* determine symbolic info for each local row */
4826   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4827 
4828   for (k=0; k<merge->nrecv; k++) {
4829     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4830     nrows       = *buf_ri_k[k];
4831     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4832     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4833   }
4834 
4835   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4836   len  = 0;
4837   for (i=0; i<m; i++) {
4838     bnzi = 0;
4839     /* add local non-zero cols of this proc's seqmat into lnk */
4840     arow  = owners[rank] + i;
4841     anzi  = ai[arow+1] - ai[arow];
4842     aj    = a->j + ai[arow];
4843     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4844     bnzi += nlnk;
4845     /* add received col data into lnk */
4846     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4847       if (i == *nextrow[k]) { /* i-th row */
4848         anzi  = *(nextai[k]+1) - *nextai[k];
4849         aj    = buf_rj[k] + *nextai[k];
4850         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4851         bnzi += nlnk;
4852         nextrow[k]++; nextai[k]++;
4853       }
4854     }
4855     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4856 
4857     /* if free space is not available, make more free space */
4858     if (current_space->local_remaining<bnzi) {
4859       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4860       nspacedouble++;
4861     }
4862     /* copy data into free space, then initialize lnk */
4863     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4864     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4865 
4866     current_space->array           += bnzi;
4867     current_space->local_used      += bnzi;
4868     current_space->local_remaining -= bnzi;
4869 
4870     bi[i+1] = bi[i] + bnzi;
4871   }
4872 
4873   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4874 
4875   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4876   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4877   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4878 
4879   /* create symbolic parallel matrix B_mpi */
4880   /*---------------------------------------*/
4881   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4882   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4883   if (n==PETSC_DECIDE) {
4884     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4885   } else {
4886     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4887   }
4888   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4889   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4890   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4891   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4892   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4893 
4894   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4895   B_mpi->assembled    = PETSC_FALSE;
4896   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4897   merge->bi           = bi;
4898   merge->bj           = bj;
4899   merge->buf_ri       = buf_ri;
4900   merge->buf_rj       = buf_rj;
4901   merge->coi          = NULL;
4902   merge->coj          = NULL;
4903   merge->owners_co    = NULL;
4904 
4905   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4906 
4907   /* attach the supporting struct to B_mpi for reuse */
4908   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4909   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4910   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4911   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4912   *mpimat = B_mpi;
4913 
4914   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4915   PetscFunctionReturn(0);
4916 }
4917 
4918 /*@C
4919       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4920                  matrices from each processor
4921 
4922     Collective on MPI_Comm
4923 
4924    Input Parameters:
4925 +    comm - the communicators the parallel matrix will live on
4926 .    seqmat - the input sequential matrices
4927 .    m - number of local rows (or PETSC_DECIDE)
4928 .    n - number of local columns (or PETSC_DECIDE)
4929 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4930 
4931    Output Parameter:
4932 .    mpimat - the parallel matrix generated
4933 
4934     Level: advanced
4935 
4936    Notes:
4937      The dimensions of the sequential matrix in each processor MUST be the same.
4938      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4939      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4940 @*/
4941 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4942 {
4943   PetscErrorCode ierr;
4944   PetscMPIInt    size;
4945 
4946   PetscFunctionBegin;
4947   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4948   if (size == 1) {
4949     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4950     if (scall == MAT_INITIAL_MATRIX) {
4951       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4952     } else {
4953       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4954     }
4955     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4956     PetscFunctionReturn(0);
4957   }
4958   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4959   if (scall == MAT_INITIAL_MATRIX) {
4960     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4961   }
4962   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4963   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4964   PetscFunctionReturn(0);
4965 }
4966 
4967 /*@
4968      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4969           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4970           with MatGetSize()
4971 
4972     Not Collective
4973 
4974    Input Parameters:
4975 +    A - the matrix
4976 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4977 
4978    Output Parameter:
4979 .    A_loc - the local sequential matrix generated
4980 
4981     Level: developer
4982 
4983 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4984 
4985 @*/
4986 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4987 {
4988   PetscErrorCode ierr;
4989   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4990   Mat_SeqAIJ     *mat,*a,*b;
4991   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4992   MatScalar      *aa,*ba,*cam;
4993   PetscScalar    *ca;
4994   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4995   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4996   PetscBool      match;
4997   MPI_Comm       comm;
4998   PetscMPIInt    size;
4999 
5000   PetscFunctionBegin;
5001   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5002   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5003   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5004   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5005   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5006 
5007   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5008   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5009   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5010   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5011   aa = a->a; ba = b->a;
5012   if (scall == MAT_INITIAL_MATRIX) {
5013     if (size == 1) {
5014       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5015       PetscFunctionReturn(0);
5016     }
5017 
5018     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5019     ci[0] = 0;
5020     for (i=0; i<am; i++) {
5021       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5022     }
5023     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5024     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5025     k    = 0;
5026     for (i=0; i<am; i++) {
5027       ncols_o = bi[i+1] - bi[i];
5028       ncols_d = ai[i+1] - ai[i];
5029       /* off-diagonal portion of A */
5030       for (jo=0; jo<ncols_o; jo++) {
5031         col = cmap[*bj];
5032         if (col >= cstart) break;
5033         cj[k]   = col; bj++;
5034         ca[k++] = *ba++;
5035       }
5036       /* diagonal portion of A */
5037       for (j=0; j<ncols_d; j++) {
5038         cj[k]   = cstart + *aj++;
5039         ca[k++] = *aa++;
5040       }
5041       /* off-diagonal portion of A */
5042       for (j=jo; j<ncols_o; j++) {
5043         cj[k]   = cmap[*bj++];
5044         ca[k++] = *ba++;
5045       }
5046     }
5047     /* put together the new matrix */
5048     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5049     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5050     /* Since these are PETSc arrays, change flags to free them as necessary. */
5051     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5052     mat->free_a  = PETSC_TRUE;
5053     mat->free_ij = PETSC_TRUE;
5054     mat->nonew   = 0;
5055   } else if (scall == MAT_REUSE_MATRIX) {
5056     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5057     ci = mat->i; cj = mat->j; cam = mat->a;
5058     for (i=0; i<am; i++) {
5059       /* off-diagonal portion of A */
5060       ncols_o = bi[i+1] - bi[i];
5061       for (jo=0; jo<ncols_o; jo++) {
5062         col = cmap[*bj];
5063         if (col >= cstart) break;
5064         *cam++ = *ba++; bj++;
5065       }
5066       /* diagonal portion of A */
5067       ncols_d = ai[i+1] - ai[i];
5068       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5069       /* off-diagonal portion of A */
5070       for (j=jo; j<ncols_o; j++) {
5071         *cam++ = *ba++; bj++;
5072       }
5073     }
5074   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5075   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5076   PetscFunctionReturn(0);
5077 }
5078 
5079 /*@C
5080      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5081 
5082     Not Collective
5083 
5084    Input Parameters:
5085 +    A - the matrix
5086 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5087 -    row, col - index sets of rows and columns to extract (or NULL)
5088 
5089    Output Parameter:
5090 .    A_loc - the local sequential matrix generated
5091 
5092     Level: developer
5093 
5094 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5095 
5096 @*/
5097 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5098 {
5099   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5100   PetscErrorCode ierr;
5101   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5102   IS             isrowa,iscola;
5103   Mat            *aloc;
5104   PetscBool      match;
5105 
5106   PetscFunctionBegin;
5107   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5108   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5109   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5110   if (!row) {
5111     start = A->rmap->rstart; end = A->rmap->rend;
5112     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5113   } else {
5114     isrowa = *row;
5115   }
5116   if (!col) {
5117     start = A->cmap->rstart;
5118     cmap  = a->garray;
5119     nzA   = a->A->cmap->n;
5120     nzB   = a->B->cmap->n;
5121     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5122     ncols = 0;
5123     for (i=0; i<nzB; i++) {
5124       if (cmap[i] < start) idx[ncols++] = cmap[i];
5125       else break;
5126     }
5127     imark = i;
5128     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5129     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5130     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5131   } else {
5132     iscola = *col;
5133   }
5134   if (scall != MAT_INITIAL_MATRIX) {
5135     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5136     aloc[0] = *A_loc;
5137   }
5138   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5139   if (!col) { /* attach global id of condensed columns */
5140     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5141   }
5142   *A_loc = aloc[0];
5143   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5144   if (!row) {
5145     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5146   }
5147   if (!col) {
5148     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5149   }
5150   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5151   PetscFunctionReturn(0);
5152 }
5153 
5154 /*@C
5155     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5156 
5157     Collective on Mat
5158 
5159    Input Parameters:
5160 +    A,B - the matrices in mpiaij format
5161 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5162 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5163 
5164    Output Parameter:
5165 +    rowb, colb - index sets of rows and columns of B to extract
5166 -    B_seq - the sequential matrix generated
5167 
5168     Level: developer
5169 
5170 @*/
5171 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5172 {
5173   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5174   PetscErrorCode ierr;
5175   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5176   IS             isrowb,iscolb;
5177   Mat            *bseq=NULL;
5178 
5179   PetscFunctionBegin;
5180   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5181     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5182   }
5183   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5184 
5185   if (scall == MAT_INITIAL_MATRIX) {
5186     start = A->cmap->rstart;
5187     cmap  = a->garray;
5188     nzA   = a->A->cmap->n;
5189     nzB   = a->B->cmap->n;
5190     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5191     ncols = 0;
5192     for (i=0; i<nzB; i++) {  /* row < local row index */
5193       if (cmap[i] < start) idx[ncols++] = cmap[i];
5194       else break;
5195     }
5196     imark = i;
5197     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5198     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5199     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5200     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5201   } else {
5202     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5203     isrowb  = *rowb; iscolb = *colb;
5204     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5205     bseq[0] = *B_seq;
5206   }
5207   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5208   *B_seq = bseq[0];
5209   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5210   if (!rowb) {
5211     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5212   } else {
5213     *rowb = isrowb;
5214   }
5215   if (!colb) {
5216     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5217   } else {
5218     *colb = iscolb;
5219   }
5220   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5221   PetscFunctionReturn(0);
5222 }
5223 
5224 /*
5225     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5226     of the OFF-DIAGONAL portion of local A
5227 
5228     Collective on Mat
5229 
5230    Input Parameters:
5231 +    A,B - the matrices in mpiaij format
5232 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5233 
5234    Output Parameter:
5235 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5236 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5237 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5238 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5239 
5240     Level: developer
5241 
5242 */
5243 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5244 {
5245   VecScatter_MPI_General *gen_to,*gen_from;
5246   PetscErrorCode         ierr;
5247   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5248   Mat_SeqAIJ             *b_oth;
5249   VecScatter             ctx;
5250   MPI_Comm               comm;
5251   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5252   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5253   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5254   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5255   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5256   MPI_Request            *rwaits = NULL,*swaits = NULL;
5257   MPI_Status             *sstatus,rstatus;
5258   PetscMPIInt            jj,size;
5259   VecScatterType         type;
5260   PetscBool              mpi1;
5261 
5262   PetscFunctionBegin;
5263   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5264   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5265 
5266   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5267     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5268   }
5269   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5270   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5271 
5272   if (size == 1) {
5273     startsj_s = NULL;
5274     bufa_ptr  = NULL;
5275     *B_oth    = NULL;
5276     PetscFunctionReturn(0);
5277   }
5278 
5279   ctx = a->Mvctx;
5280   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5281   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5282   if (!mpi1) {
5283     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5284      thus create a->Mvctx_mpi1 */
5285     if (!a->Mvctx_mpi1) {
5286       a->Mvctx_mpi1_flg = PETSC_TRUE;
5287       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5288     }
5289     ctx = a->Mvctx_mpi1;
5290   }
5291   tag = ((PetscObject)ctx)->tag;
5292 
5293   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5294   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5295   nrecvs   = gen_from->n;
5296   nsends   = gen_to->n;
5297 
5298   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5299   srow    = gen_to->indices;    /* local row index to be sent */
5300   sstarts = gen_to->starts;
5301   sprocs  = gen_to->procs;
5302   sstatus = gen_to->sstatus;
5303   sbs     = gen_to->bs;
5304   rstarts = gen_from->starts;
5305   rprocs  = gen_from->procs;
5306   rbs     = gen_from->bs;
5307 
5308   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5309   if (scall == MAT_INITIAL_MATRIX) {
5310     /* i-array */
5311     /*---------*/
5312     /*  post receives */
5313     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5314     for (i=0; i<nrecvs; i++) {
5315       rowlen = rvalues + rstarts[i]*rbs;
5316       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5317       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5318     }
5319 
5320     /* pack the outgoing message */
5321     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5322 
5323     sstartsj[0] = 0;
5324     rstartsj[0] = 0;
5325     len         = 0; /* total length of j or a array to be sent */
5326     k           = 0;
5327     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5328     for (i=0; i<nsends; i++) {
5329       rowlen = svalues + sstarts[i]*sbs;
5330       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5331       for (j=0; j<nrows; j++) {
5332         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5333         for (l=0; l<sbs; l++) {
5334           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5335 
5336           rowlen[j*sbs+l] = ncols;
5337 
5338           len += ncols;
5339           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5340         }
5341         k++;
5342       }
5343       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5344 
5345       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5346     }
5347     /* recvs and sends of i-array are completed */
5348     i = nrecvs;
5349     while (i--) {
5350       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5351     }
5352     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5353     ierr = PetscFree(svalues);CHKERRQ(ierr);
5354 
5355     /* allocate buffers for sending j and a arrays */
5356     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5357     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5358 
5359     /* create i-array of B_oth */
5360     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5361 
5362     b_othi[0] = 0;
5363     len       = 0; /* total length of j or a array to be received */
5364     k         = 0;
5365     for (i=0; i<nrecvs; i++) {
5366       rowlen = rvalues + rstarts[i]*rbs;
5367       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5368       for (j=0; j<nrows; j++) {
5369         b_othi[k+1] = b_othi[k] + rowlen[j];
5370         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5371         k++;
5372       }
5373       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5374     }
5375     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5376 
5377     /* allocate space for j and a arrrays of B_oth */
5378     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5379     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5380 
5381     /* j-array */
5382     /*---------*/
5383     /*  post receives of j-array */
5384     for (i=0; i<nrecvs; i++) {
5385       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5386       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5387     }
5388 
5389     /* pack the outgoing message j-array */
5390     k = 0;
5391     for (i=0; i<nsends; i++) {
5392       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5393       bufJ  = bufj+sstartsj[i];
5394       for (j=0; j<nrows; j++) {
5395         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5396         for (ll=0; ll<sbs; ll++) {
5397           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5398           for (l=0; l<ncols; l++) {
5399             *bufJ++ = cols[l];
5400           }
5401           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5402         }
5403       }
5404       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5405     }
5406 
5407     /* recvs and sends of j-array are completed */
5408     i = nrecvs;
5409     while (i--) {
5410       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5411     }
5412     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5413   } else if (scall == MAT_REUSE_MATRIX) {
5414     sstartsj = *startsj_s;
5415     rstartsj = *startsj_r;
5416     bufa     = *bufa_ptr;
5417     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5418     b_otha   = b_oth->a;
5419   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5420 
5421   /* a-array */
5422   /*---------*/
5423   /*  post receives of a-array */
5424   for (i=0; i<nrecvs; i++) {
5425     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5426     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5427   }
5428 
5429   /* pack the outgoing message a-array */
5430   k = 0;
5431   for (i=0; i<nsends; i++) {
5432     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5433     bufA  = bufa+sstartsj[i];
5434     for (j=0; j<nrows; j++) {
5435       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5436       for (ll=0; ll<sbs; ll++) {
5437         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5438         for (l=0; l<ncols; l++) {
5439           *bufA++ = vals[l];
5440         }
5441         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5442       }
5443     }
5444     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5445   }
5446   /* recvs and sends of a-array are completed */
5447   i = nrecvs;
5448   while (i--) {
5449     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5450   }
5451   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5452   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5453 
5454   if (scall == MAT_INITIAL_MATRIX) {
5455     /* put together the new matrix */
5456     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5457 
5458     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5459     /* Since these are PETSc arrays, change flags to free them as necessary. */
5460     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5461     b_oth->free_a  = PETSC_TRUE;
5462     b_oth->free_ij = PETSC_TRUE;
5463     b_oth->nonew   = 0;
5464 
5465     ierr = PetscFree(bufj);CHKERRQ(ierr);
5466     if (!startsj_s || !bufa_ptr) {
5467       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5468       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5469     } else {
5470       *startsj_s = sstartsj;
5471       *startsj_r = rstartsj;
5472       *bufa_ptr  = bufa;
5473     }
5474   }
5475   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5476   PetscFunctionReturn(0);
5477 }
5478 
5479 /*@C
5480   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5481 
5482   Not Collective
5483 
5484   Input Parameters:
5485 . A - The matrix in mpiaij format
5486 
5487   Output Parameter:
5488 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5489 . colmap - A map from global column index to local index into lvec
5490 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5491 
5492   Level: developer
5493 
5494 @*/
5495 #if defined(PETSC_USE_CTABLE)
5496 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5497 #else
5498 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5499 #endif
5500 {
5501   Mat_MPIAIJ *a;
5502 
5503   PetscFunctionBegin;
5504   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5505   PetscValidPointer(lvec, 2);
5506   PetscValidPointer(colmap, 3);
5507   PetscValidPointer(multScatter, 4);
5508   a = (Mat_MPIAIJ*) A->data;
5509   if (lvec) *lvec = a->lvec;
5510   if (colmap) *colmap = a->colmap;
5511   if (multScatter) *multScatter = a->Mvctx;
5512   PetscFunctionReturn(0);
5513 }
5514 
5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5516 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5517 #if defined(PETSC_HAVE_MKL_SPARSE)
5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5519 #endif
5520 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5521 #if defined(PETSC_HAVE_ELEMENTAL)
5522 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_HYPRE)
5525 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5526 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5527 #endif
5528 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5530 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5531 
5532 /*
5533     Computes (B'*A')' since computing B*A directly is untenable
5534 
5535                n                       p                          p
5536         (              )       (              )         (                  )
5537       m (      A       )  *  n (       B      )   =   m (         C        )
5538         (              )       (              )         (                  )
5539 
5540 */
5541 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5542 {
5543   PetscErrorCode ierr;
5544   Mat            At,Bt,Ct;
5545 
5546   PetscFunctionBegin;
5547   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5548   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5549   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5550   ierr = MatDestroy(&At);CHKERRQ(ierr);
5551   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5552   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5553   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5554   PetscFunctionReturn(0);
5555 }
5556 
5557 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5558 {
5559   PetscErrorCode ierr;
5560   PetscInt       m=A->rmap->n,n=B->cmap->n;
5561   Mat            Cmat;
5562 
5563   PetscFunctionBegin;
5564   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5565   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5566   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5567   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5568   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5569   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5570   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5571   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5572 
5573   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5574 
5575   *C = Cmat;
5576   PetscFunctionReturn(0);
5577 }
5578 
5579 /* ----------------------------------------------------------------*/
5580 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5581 {
5582   PetscErrorCode ierr;
5583 
5584   PetscFunctionBegin;
5585   if (scall == MAT_INITIAL_MATRIX) {
5586     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5587     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5588     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5589   }
5590   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5591   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5592   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5593   PetscFunctionReturn(0);
5594 }
5595 
5596 /*MC
5597    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5598 
5599    Options Database Keys:
5600 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5601 
5602   Level: beginner
5603 
5604 .seealso: MatCreateAIJ()
5605 M*/
5606 
5607 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5608 {
5609   Mat_MPIAIJ     *b;
5610   PetscErrorCode ierr;
5611   PetscMPIInt    size;
5612 
5613   PetscFunctionBegin;
5614   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5615 
5616   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5617   B->data       = (void*)b;
5618   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5619   B->assembled  = PETSC_FALSE;
5620   B->insertmode = NOT_SET_VALUES;
5621   b->size       = size;
5622 
5623   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5624 
5625   /* build cache for off array entries formed */
5626   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5627 
5628   b->donotstash  = PETSC_FALSE;
5629   b->colmap      = 0;
5630   b->garray      = 0;
5631   b->roworiented = PETSC_TRUE;
5632 
5633   /* stuff used for matrix vector multiply */
5634   b->lvec  = NULL;
5635   b->Mvctx = NULL;
5636 
5637   /* stuff for MatGetRow() */
5638   b->rowindices   = 0;
5639   b->rowvalues    = 0;
5640   b->getrowactive = PETSC_FALSE;
5641 
5642   /* flexible pointer used in CUSP/CUSPARSE classes */
5643   b->spptr = NULL;
5644 
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5654 #if defined(PETSC_HAVE_MKL_SPARSE)
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5656 #endif
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5659 #if defined(PETSC_HAVE_ELEMENTAL)
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5661 #endif
5662 #if defined(PETSC_HAVE_HYPRE)
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5664 #endif
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5670 #if defined(PETSC_HAVE_HYPRE)
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5672 #endif
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5675   PetscFunctionReturn(0);
5676 }
5677 
5678 /*@C
5679      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5680          and "off-diagonal" part of the matrix in CSR format.
5681 
5682    Collective on MPI_Comm
5683 
5684    Input Parameters:
5685 +  comm - MPI communicator
5686 .  m - number of local rows (Cannot be PETSC_DECIDE)
5687 .  n - This value should be the same as the local size used in creating the
5688        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5689        calculated if N is given) For square matrices n is almost always m.
5690 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5691 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5692 .   i - row indices for "diagonal" portion of matrix
5693 .   j - column indices
5694 .   a - matrix values
5695 .   oi - row indices for "off-diagonal" portion of matrix
5696 .   oj - column indices
5697 -   oa - matrix values
5698 
5699    Output Parameter:
5700 .   mat - the matrix
5701 
5702    Level: advanced
5703 
5704    Notes:
5705        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5706        must free the arrays once the matrix has been destroyed and not before.
5707 
5708        The i and j indices are 0 based
5709 
5710        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5711 
5712        This sets local rows and cannot be used to set off-processor values.
5713 
5714        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5715        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5716        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5717        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5718        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5719        communication if it is known that only local entries will be set.
5720 
5721 .keywords: matrix, aij, compressed row, sparse, parallel
5722 
5723 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5724           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5725 @*/
5726 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5727 {
5728   PetscErrorCode ierr;
5729   Mat_MPIAIJ     *maij;
5730 
5731   PetscFunctionBegin;
5732   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5733   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5734   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5735   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5736   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5737   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5738   maij = (Mat_MPIAIJ*) (*mat)->data;
5739 
5740   (*mat)->preallocated = PETSC_TRUE;
5741 
5742   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5743   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5744 
5745   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5746   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5747 
5748   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5749   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5750   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752 
5753   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5754   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5755   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5756   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5757   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5758   PetscFunctionReturn(0);
5759 }
5760 
5761 /*
5762     Special version for direct calls from Fortran
5763 */
5764 #include <petsc/private/fortranimpl.h>
5765 
5766 /* Change these macros so can be used in void function */
5767 #undef CHKERRQ
5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5769 #undef SETERRQ2
5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5771 #undef SETERRQ3
5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5773 #undef SETERRQ
5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5775 
5776 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5777 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5778 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5779 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5780 #else
5781 #endif
5782 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5783 {
5784   Mat            mat  = *mmat;
5785   PetscInt       m    = *mm, n = *mn;
5786   InsertMode     addv = *maddv;
5787   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5788   PetscScalar    value;
5789   PetscErrorCode ierr;
5790 
5791   MatCheckPreallocated(mat,1);
5792   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5793 
5794 #if defined(PETSC_USE_DEBUG)
5795   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5796 #endif
5797   {
5798     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5799     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5800     PetscBool roworiented = aij->roworiented;
5801 
5802     /* Some Variables required in the macro */
5803     Mat        A                 = aij->A;
5804     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5805     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5806     MatScalar  *aa               = a->a;
5807     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5808     Mat        B                 = aij->B;
5809     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5810     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5811     MatScalar  *ba               = b->a;
5812 
5813     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5814     PetscInt  nonew = a->nonew;
5815     MatScalar *ap1,*ap2;
5816 
5817     PetscFunctionBegin;
5818     for (i=0; i<m; i++) {
5819       if (im[i] < 0) continue;
5820 #if defined(PETSC_USE_DEBUG)
5821       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5822 #endif
5823       if (im[i] >= rstart && im[i] < rend) {
5824         row      = im[i] - rstart;
5825         lastcol1 = -1;
5826         rp1      = aj + ai[row];
5827         ap1      = aa + ai[row];
5828         rmax1    = aimax[row];
5829         nrow1    = ailen[row];
5830         low1     = 0;
5831         high1    = nrow1;
5832         lastcol2 = -1;
5833         rp2      = bj + bi[row];
5834         ap2      = ba + bi[row];
5835         rmax2    = bimax[row];
5836         nrow2    = bilen[row];
5837         low2     = 0;
5838         high2    = nrow2;
5839 
5840         for (j=0; j<n; j++) {
5841           if (roworiented) value = v[i*n+j];
5842           else value = v[i+j*m];
5843           if (in[j] >= cstart && in[j] < cend) {
5844             col = in[j] - cstart;
5845             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5846             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5847           } else if (in[j] < 0) continue;
5848 #if defined(PETSC_USE_DEBUG)
5849           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5850           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5851 #endif
5852           else {
5853             if (mat->was_assembled) {
5854               if (!aij->colmap) {
5855                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5856               }
5857 #if defined(PETSC_USE_CTABLE)
5858               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5859               col--;
5860 #else
5861               col = aij->colmap[in[j]] - 1;
5862 #endif
5863               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5864               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5865                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5866                 col  =  in[j];
5867                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5868                 B     = aij->B;
5869                 b     = (Mat_SeqAIJ*)B->data;
5870                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5871                 rp2   = bj + bi[row];
5872                 ap2   = ba + bi[row];
5873                 rmax2 = bimax[row];
5874                 nrow2 = bilen[row];
5875                 low2  = 0;
5876                 high2 = nrow2;
5877                 bm    = aij->B->rmap->n;
5878                 ba    = b->a;
5879               }
5880             } else col = in[j];
5881             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5882           }
5883         }
5884       } else if (!aij->donotstash) {
5885         if (roworiented) {
5886           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5887         } else {
5888           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5889         }
5890       }
5891     }
5892   }
5893   PetscFunctionReturnVoid();
5894 }
5895