xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a5c21ed897b8f3baa7253d9efd1dabe7128e7fbd)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938   VecScatter     Mvctx = a->Mvctx;
939 
940   PetscFunctionBegin;
941   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
942   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
943 
944   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
946   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
962 {
963   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
964   PetscErrorCode ierr;
965   VecScatter     Mvctx = a->Mvctx;
966 
967   PetscFunctionBegin;
968   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
969   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
971   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   PetscErrorCode ierr;
980   PetscBool      merged;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
984   /* do nondiagonal part */
985   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
986   if (!merged) {
987     /* send it on its way */
988     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
989     /* do local part */
990     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
991     /* receive remote parts: note this assumes the values are not actually */
992     /* added in yy until the next line, */
993     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994   } else {
995     /* do local part */
996     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
997     /* send it on its way */
998     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999     /* values actually were received in the Begin() but we need to call this nop */
1000     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1006 {
1007   MPI_Comm       comm;
1008   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1009   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1010   IS             Me,Notme;
1011   PetscErrorCode ierr;
1012   PetscInt       M,N,first,last,*notme,i;
1013   PetscMPIInt    size;
1014 
1015   PetscFunctionBegin;
1016   /* Easy test: symmetric diagonal block */
1017   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1018   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1019   if (!*f) PetscFunctionReturn(0);
1020   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1026   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1027   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1031   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1032   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1033   Aoff = Aoffs[0];
1034   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1035   Boff = Boffs[0];
1036   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1037   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1039   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1041   ierr = PetscFree(notme);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscErrorCode ierr;
1048 
1049   PetscFunctionBegin;
1050   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1076 {
1077   PetscErrorCode ierr;
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079 
1080   PetscFunctionBegin;
1081   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1082   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1083   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1094   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1099 {
1100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1109   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1111 #if defined(PETSC_USE_CTABLE)
1112   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1113 #else
1114   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1118   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1119   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1120   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1121   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1122   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1123 
1124   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1133 #if defined(PETSC_HAVE_ELEMENTAL)
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1135 #endif
1136 #if defined(PETSC_HAVE_HYPRE)
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1139 #endif
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1144 {
1145   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1146   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1147   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1148   PetscErrorCode ierr;
1149   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1150   int            fd;
1151   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1152   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1153   PetscScalar    *column_values;
1154   PetscInt       message_count,flowcontrolcount;
1155   FILE           *file;
1156 
1157   PetscFunctionBegin;
1158   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1159   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1160   nz   = A->nz + B->nz;
1161   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1162   if (!rank) {
1163     header[0] = MAT_FILE_CLASSID;
1164     header[1] = mat->rmap->N;
1165     header[2] = mat->cmap->N;
1166 
1167     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1168     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     /* get largest number of rows any processor has */
1170     rlen  = mat->rmap->n;
1171     range = mat->rmap->range;
1172     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1173   } else {
1174     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     rlen = mat->rmap->n;
1176   }
1177 
1178   /* load up the local row counts */
1179   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1180   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1181 
1182   /* store the row lengths to the file */
1183   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1184   if (!rank) {
1185     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     for (i=1; i<size; i++) {
1187       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1188       rlen = range[i+1] - range[i];
1189       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     }
1192     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1193   } else {
1194     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1195     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1196     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1197   }
1198   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1199 
1200   /* load up the local column indices */
1201   nzmax = nz; /* th processor needs space a largest processor needs */
1202   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1204   cnt   = 0;
1205   for (i=0; i<mat->rmap->n; i++) {
1206     for (j=B->i[i]; j<B->i[i+1]; j++) {
1207       if ((col = garray[B->j[j]]) > cstart) break;
1208       column_indices[cnt++] = col;
1209     }
1210     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1211     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1212   }
1213   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1214 
1215   /* store the column indices to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1224       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1235 
1236   /* load up the local column values */
1237   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1238   cnt  = 0;
1239   for (i=0; i<mat->rmap->n; i++) {
1240     for (j=B->i[i]; j<B->i[i+1]; j++) {
1241       if (garray[B->j[j]] > cstart) break;
1242       column_values[cnt++] = B->a[j];
1243     }
1244     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1245     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1246   }
1247   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1248 
1249   /* store the column values to the file */
1250   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1251   if (!rank) {
1252     MPI_Status status;
1253     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1254     for (i=1; i<size; i++) {
1255       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1256       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1257       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1258       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1260     }
1261     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1262   } else {
1263     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1264     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1265     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1267   }
1268   ierr = PetscFree(column_values);CHKERRQ(ierr);
1269 
1270   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1271   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #include <petscdraw.h>
1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1277 {
1278   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1279   PetscErrorCode    ierr;
1280   PetscMPIInt       rank = aij->rank,size = aij->size;
1281   PetscBool         isdraw,iascii,isbinary;
1282   PetscViewer       sviewer;
1283   PetscViewerFormat format;
1284 
1285   PetscFunctionBegin;
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1288   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1289   if (iascii) {
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1292       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1293       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1294       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       for (i=0; i<(PetscInt)size; i++) {
1296         nmax = PetscMax(nmax,nz[i]);
1297         nmin = PetscMin(nmin,nz[i]);
1298         navg += nz[i];
1299       }
1300       ierr = PetscFree(nz);CHKERRQ(ierr);
1301       navg = navg/size;
1302       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1303       PetscFunctionReturn(0);
1304     }
1305     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1306     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1307       MatInfo   info;
1308       PetscBool inodes;
1309 
1310       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1311       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1312       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1313       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1314       if (!inodes) {
1315         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1316                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1317       } else {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1320       }
1321       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1323       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1325       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1328       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1329       PetscFunctionReturn(0);
1330     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1331       PetscInt inodecount,inodelimit,*inodes;
1332       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1333       if (inodes) {
1334         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1335       } else {
1336         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1337       }
1338       PetscFunctionReturn(0);
1339     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1340       PetscFunctionReturn(0);
1341     }
1342   } else if (isbinary) {
1343     if (size == 1) {
1344       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1345       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1346     } else {
1347       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1348     }
1349     PetscFunctionReturn(0);
1350   } else if (isdraw) {
1351     PetscDraw draw;
1352     PetscBool isnull;
1353     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1354     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1355     if (isnull) PetscFunctionReturn(0);
1356   }
1357 
1358   {
1359     /* assemble the entire matrix onto first processor. */
1360     Mat        A;
1361     Mat_SeqAIJ *Aloc;
1362     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1363     MatScalar  *a;
1364 
1365     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1366     if (!rank) {
1367       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1368     } else {
1369       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1370     }
1371     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1372     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1373     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1374     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1375     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1376 
1377     /* copy over the A part */
1378     Aloc = (Mat_SeqAIJ*)aij->A->data;
1379     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1380     row  = mat->rmap->rstart;
1381     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1382     for (i=0; i<m; i++) {
1383       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1384       row++;
1385       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1386     }
1387     aj = Aloc->j;
1388     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1389 
1390     /* copy over the B part */
1391     Aloc = (Mat_SeqAIJ*)aij->B->data;
1392     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1393     row  = mat->rmap->rstart;
1394     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1395     ct   = cols;
1396     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1397     for (i=0; i<m; i++) {
1398       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1399       row++;
1400       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1401     }
1402     ierr = PetscFree(ct);CHKERRQ(ierr);
1403     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1404     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405     /*
1406        Everyone has to call to draw the matrix since the graphics waits are
1407        synchronized across all processors that share the PetscDraw object
1408     */
1409     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1410     if (!rank) {
1411       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1412       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1413     }
1414     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1415     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1416     ierr = MatDestroy(&A);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1422 {
1423   PetscErrorCode ierr;
1424   PetscBool      iascii,isdraw,issocket,isbinary;
1425 
1426   PetscFunctionBegin;
1427   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1428   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1429   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1431   if (iascii || isdraw || isbinary || issocket) {
1432     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1438 {
1439   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1440   PetscErrorCode ierr;
1441   Vec            bb1 = 0;
1442   PetscBool      hasop;
1443 
1444   PetscFunctionBegin;
1445   if (flag == SOR_APPLY_UPPER) {
1446     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1447     PetscFunctionReturn(0);
1448   }
1449 
1450   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1451     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1452   }
1453 
1454   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1455     if (flag & SOR_ZERO_INITIAL_GUESS) {
1456       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1457       its--;
1458     }
1459 
1460     while (its--) {
1461       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463 
1464       /* update rhs: bb1 = bb - B*x */
1465       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1466       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1467 
1468       /* local sweep */
1469       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1470     }
1471   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1472     if (flag & SOR_ZERO_INITIAL_GUESS) {
1473       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1474       its--;
1475     }
1476     while (its--) {
1477       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479 
1480       /* update rhs: bb1 = bb - B*x */
1481       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1482       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1483 
1484       /* local sweep */
1485       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1486     }
1487   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1488     if (flag & SOR_ZERO_INITIAL_GUESS) {
1489       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1490       its--;
1491     }
1492     while (its--) {
1493       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495 
1496       /* update rhs: bb1 = bb - B*x */
1497       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1498       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1499 
1500       /* local sweep */
1501       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1502     }
1503   } else if (flag & SOR_EISENSTAT) {
1504     Vec xx1;
1505 
1506     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1507     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1508 
1509     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511     if (!mat->diag) {
1512       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1513       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1514     }
1515     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1516     if (hasop) {
1517       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1518     } else {
1519       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1520     }
1521     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1522 
1523     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1524 
1525     /* local sweep */
1526     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1527     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1528     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1529   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1530 
1531   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1532 
1533   matin->factorerrortype = mat->A->factorerrortype;
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1538 {
1539   Mat            aA,aB,Aperm;
1540   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1541   PetscScalar    *aa,*ba;
1542   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1543   PetscSF        rowsf,sf;
1544   IS             parcolp = NULL;
1545   PetscBool      done;
1546   PetscErrorCode ierr;
1547 
1548   PetscFunctionBegin;
1549   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1550   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1551   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1552   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1553 
1554   /* Invert row permutation to find out where my rows should go */
1555   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1556   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1557   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1558   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1559   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1560   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1561 
1562   /* Invert column permutation to find out where my columns should go */
1563   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1564   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1565   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1566   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1567   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1568   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1570 
1571   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1572   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1573   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1574 
1575   /* Find out where my gcols should go */
1576   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1577   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1578   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1579   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1580   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1581   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1583   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1584 
1585   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1586   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1587   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1588   for (i=0; i<m; i++) {
1589     PetscInt row = rdest[i],rowner;
1590     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1591     for (j=ai[i]; j<ai[i+1]; j++) {
1592       PetscInt cowner,col = cdest[aj[j]];
1593       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1594       if (rowner == cowner) dnnz[i]++;
1595       else onnz[i]++;
1596     }
1597     for (j=bi[i]; j<bi[i+1]; j++) {
1598       PetscInt cowner,col = gcdest[bj[j]];
1599       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1600       if (rowner == cowner) dnnz[i]++;
1601       else onnz[i]++;
1602     }
1603   }
1604   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1605   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1606   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1607   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1608   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1609 
1610   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1611   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1612   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1613   for (i=0; i<m; i++) {
1614     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1615     PetscInt j0,rowlen;
1616     rowlen = ai[i+1] - ai[i];
1617     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1618       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1619       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1620     }
1621     rowlen = bi[i+1] - bi[i];
1622     for (j0=j=0; j<rowlen; j0=j) {
1623       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1624       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1625     }
1626   }
1627   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1628   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1629   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1630   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1631   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1632   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1633   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1634   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1635   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1636   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1637   *B = Aperm;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1642 {
1643   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1644   PetscErrorCode ierr;
1645 
1646   PetscFunctionBegin;
1647   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1648   if (ghosts) *ghosts = aij->garray;
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1653 {
1654   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1655   Mat            A    = mat->A,B = mat->B;
1656   PetscErrorCode ierr;
1657   PetscReal      isend[5],irecv[5];
1658 
1659   PetscFunctionBegin;
1660   info->block_size = 1.0;
1661   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1662 
1663   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1664   isend[3] = info->memory;  isend[4] = info->mallocs;
1665 
1666   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1667 
1668   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1669   isend[3] += info->memory;  isend[4] += info->mallocs;
1670   if (flag == MAT_LOCAL) {
1671     info->nz_used      = isend[0];
1672     info->nz_allocated = isend[1];
1673     info->nz_unneeded  = isend[2];
1674     info->memory       = isend[3];
1675     info->mallocs      = isend[4];
1676   } else if (flag == MAT_GLOBAL_MAX) {
1677     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1678 
1679     info->nz_used      = irecv[0];
1680     info->nz_allocated = irecv[1];
1681     info->nz_unneeded  = irecv[2];
1682     info->memory       = irecv[3];
1683     info->mallocs      = irecv[4];
1684   } else if (flag == MAT_GLOBAL_SUM) {
1685     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1686 
1687     info->nz_used      = irecv[0];
1688     info->nz_allocated = irecv[1];
1689     info->nz_unneeded  = irecv[2];
1690     info->memory       = irecv[3];
1691     info->mallocs      = irecv[4];
1692   }
1693   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1694   info->fill_ratio_needed = 0;
1695   info->factor_mallocs    = 0;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1700 {
1701   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1702   PetscErrorCode ierr;
1703 
1704   PetscFunctionBegin;
1705   switch (op) {
1706   case MAT_NEW_NONZERO_LOCATIONS:
1707   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1708   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1709   case MAT_KEEP_NONZERO_PATTERN:
1710   case MAT_NEW_NONZERO_LOCATION_ERR:
1711   case MAT_USE_INODES:
1712   case MAT_IGNORE_ZERO_ENTRIES:
1713     MatCheckPreallocated(A,1);
1714     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1715     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1716     break;
1717   case MAT_ROW_ORIENTED:
1718     MatCheckPreallocated(A,1);
1719     a->roworiented = flg;
1720 
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_NEW_DIAGONALS:
1725     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1726     break;
1727   case MAT_IGNORE_OFF_PROC_ENTRIES:
1728     a->donotstash = flg;
1729     break;
1730   case MAT_SPD:
1731     A->spd_set = PETSC_TRUE;
1732     A->spd     = flg;
1733     if (flg) {
1734       A->symmetric                  = PETSC_TRUE;
1735       A->structurally_symmetric     = PETSC_TRUE;
1736       A->symmetric_set              = PETSC_TRUE;
1737       A->structurally_symmetric_set = PETSC_TRUE;
1738     }
1739     break;
1740   case MAT_SYMMETRIC:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_STRUCTURALLY_SYMMETRIC:
1745     MatCheckPreallocated(A,1);
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_HERMITIAN:
1749     MatCheckPreallocated(A,1);
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   case MAT_SYMMETRY_ETERNAL:
1753     MatCheckPreallocated(A,1);
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_SUBMAT_SINGLEIS:
1757     A->submat_singleis = flg;
1758     break;
1759   case MAT_STRUCTURE_ONLY:
1760     /* The option is handled directly by MatSetOption() */
1761     break;
1762   default:
1763     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1769 {
1770   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1771   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1772   PetscErrorCode ierr;
1773   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1774   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1775   PetscInt       *cmap,*idx_p;
1776 
1777   PetscFunctionBegin;
1778   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1779   mat->getrowactive = PETSC_TRUE;
1780 
1781   if (!mat->rowvalues && (idx || v)) {
1782     /*
1783         allocate enough space to hold information from the longest row.
1784     */
1785     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1786     PetscInt   max = 1,tmp;
1787     for (i=0; i<matin->rmap->n; i++) {
1788       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1789       if (max < tmp) max = tmp;
1790     }
1791     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1792   }
1793 
1794   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1795   lrow = row - rstart;
1796 
1797   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1798   if (!v)   {pvA = 0; pvB = 0;}
1799   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1800   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1801   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1802   nztot = nzA + nzB;
1803 
1804   cmap = mat->garray;
1805   if (v  || idx) {
1806     if (nztot) {
1807       /* Sort by increasing column numbers, assuming A and B already sorted */
1808       PetscInt imark = -1;
1809       if (v) {
1810         *v = v_p = mat->rowvalues;
1811         for (i=0; i<nzB; i++) {
1812           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1813           else break;
1814         }
1815         imark = i;
1816         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1817         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1818       }
1819       if (idx) {
1820         *idx = idx_p = mat->rowindices;
1821         if (imark > -1) {
1822           for (i=0; i<imark; i++) {
1823             idx_p[i] = cmap[cworkB[i]];
1824           }
1825         } else {
1826           for (i=0; i<nzB; i++) {
1827             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1828             else break;
1829           }
1830           imark = i;
1831         }
1832         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1833         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1834       }
1835     } else {
1836       if (idx) *idx = 0;
1837       if (v)   *v   = 0;
1838     }
1839   }
1840   *nz  = nztot;
1841   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1842   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1847 {
1848   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1849 
1850   PetscFunctionBegin;
1851   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1852   aij->getrowactive = PETSC_FALSE;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1857 {
1858   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1859   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1860   PetscErrorCode ierr;
1861   PetscInt       i,j,cstart = mat->cmap->rstart;
1862   PetscReal      sum = 0.0;
1863   MatScalar      *v;
1864 
1865   PetscFunctionBegin;
1866   if (aij->size == 1) {
1867     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1868   } else {
1869     if (type == NORM_FROBENIUS) {
1870       v = amat->a;
1871       for (i=0; i<amat->nz; i++) {
1872         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1873       }
1874       v = bmat->a;
1875       for (i=0; i<bmat->nz; i++) {
1876         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1877       }
1878       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1879       *norm = PetscSqrtReal(*norm);
1880       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1881     } else if (type == NORM_1) { /* max column norm */
1882       PetscReal *tmp,*tmp2;
1883       PetscInt  *jj,*garray = aij->garray;
1884       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1885       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1886       *norm = 0.0;
1887       v     = amat->a; jj = amat->j;
1888       for (j=0; j<amat->nz; j++) {
1889         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1890       }
1891       v = bmat->a; jj = bmat->j;
1892       for (j=0; j<bmat->nz; j++) {
1893         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1894       }
1895       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1896       for (j=0; j<mat->cmap->N; j++) {
1897         if (tmp2[j] > *norm) *norm = tmp2[j];
1898       }
1899       ierr = PetscFree(tmp);CHKERRQ(ierr);
1900       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1901       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1902     } else if (type == NORM_INFINITY) { /* max row norm */
1903       PetscReal ntemp = 0.0;
1904       for (j=0; j<aij->A->rmap->n; j++) {
1905         v   = amat->a + amat->i[j];
1906         sum = 0.0;
1907         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1908           sum += PetscAbsScalar(*v); v++;
1909         }
1910         v = bmat->a + bmat->i[j];
1911         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1912           sum += PetscAbsScalar(*v); v++;
1913         }
1914         if (sum > ntemp) ntemp = sum;
1915       }
1916       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1917       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1918     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1924 {
1925   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1926   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1927   PetscErrorCode ierr;
1928   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1929   PetscInt       cstart = A->cmap->rstart,ncol;
1930   Mat            B;
1931   MatScalar      *array;
1932 
1933   PetscFunctionBegin;
1934   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1935   ai = Aloc->i; aj = Aloc->j;
1936   bi = Bloc->i; bj = Bloc->j;
1937   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1938     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1939     PetscSFNode          *oloc;
1940     PETSC_UNUSED PetscSF sf;
1941 
1942     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1943     /* compute d_nnz for preallocation */
1944     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1945     for (i=0; i<ai[ma]; i++) {
1946       d_nnz[aj[i]]++;
1947       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1948     }
1949     /* compute local off-diagonal contributions */
1950     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1951     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1952     /* map those to global */
1953     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1954     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1955     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1956     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1957     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1958     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1959     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1960 
1961     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1962     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1963     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1964     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1965     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1966     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1967   } else {
1968     B    = *matout;
1969     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1970     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1971   }
1972 
1973   /* copy over the A part */
1974   array = Aloc->a;
1975   row   = A->rmap->rstart;
1976   for (i=0; i<ma; i++) {
1977     ncol = ai[i+1]-ai[i];
1978     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1979     row++;
1980     array += ncol; aj += ncol;
1981   }
1982   aj = Aloc->j;
1983   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1984 
1985   /* copy over the B part */
1986   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1987   array = Bloc->a;
1988   row   = A->rmap->rstart;
1989   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1990   cols_tmp = cols;
1991   for (i=0; i<mb; i++) {
1992     ncol = bi[i+1]-bi[i];
1993     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1994     row++;
1995     array += ncol; cols_tmp += ncol;
1996   }
1997   ierr = PetscFree(cols);CHKERRQ(ierr);
1998 
1999   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2000   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2001   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2002     *matout = B;
2003   } else {
2004     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2005   }
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2010 {
2011   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2012   Mat            a    = aij->A,b = aij->B;
2013   PetscErrorCode ierr;
2014   PetscInt       s1,s2,s3;
2015 
2016   PetscFunctionBegin;
2017   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2018   if (rr) {
2019     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2020     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2021     /* Overlap communication with computation. */
2022     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2023   }
2024   if (ll) {
2025     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2026     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2027     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2028   }
2029   /* scale  the diagonal block */
2030   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2031 
2032   if (rr) {
2033     /* Do a scatter end and then right scale the off-diagonal block */
2034     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2035     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2041 {
2042   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2051 {
2052   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2053   Mat            a,b,c,d;
2054   PetscBool      flg;
2055   PetscErrorCode ierr;
2056 
2057   PetscFunctionBegin;
2058   a = matA->A; b = matA->B;
2059   c = matB->A; d = matB->B;
2060 
2061   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2062   if (flg) {
2063     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2064   }
2065   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2070 {
2071   PetscErrorCode ierr;
2072   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2073   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2074 
2075   PetscFunctionBegin;
2076   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2077   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2078     /* because of the column compression in the off-processor part of the matrix a->B,
2079        the number of columns in a->B and b->B may be different, hence we cannot call
2080        the MatCopy() directly on the two parts. If need be, we can provide a more
2081        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2082        then copying the submatrices */
2083     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2084   } else {
2085     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2086     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2087   }
2088   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2093 {
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 /*
2102    Computes the number of nonzeros per row needed for preallocation when X and Y
2103    have different nonzero structure.
2104 */
2105 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2106 {
2107   PetscInt       i,j,k,nzx,nzy;
2108 
2109   PetscFunctionBegin;
2110   /* Set the number of nonzeros in the new matrix */
2111   for (i=0; i<m; i++) {
2112     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2113     nzx = xi[i+1] - xi[i];
2114     nzy = yi[i+1] - yi[i];
2115     nnz[i] = 0;
2116     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2117       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2118       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2119       nnz[i]++;
2120     }
2121     for (; k<nzy; k++) nnz[i]++;
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2127 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2128 {
2129   PetscErrorCode ierr;
2130   PetscInt       m = Y->rmap->N;
2131   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2132   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2133 
2134   PetscFunctionBegin;
2135   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2140 {
2141   PetscErrorCode ierr;
2142   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2143   PetscBLASInt   bnz,one=1;
2144   Mat_SeqAIJ     *x,*y;
2145 
2146   PetscFunctionBegin;
2147   if (str == SAME_NONZERO_PATTERN) {
2148     PetscScalar alpha = a;
2149     x    = (Mat_SeqAIJ*)xx->A->data;
2150     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2151     y    = (Mat_SeqAIJ*)yy->A->data;
2152     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2153     x    = (Mat_SeqAIJ*)xx->B->data;
2154     y    = (Mat_SeqAIJ*)yy->B->data;
2155     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2156     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2157     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2158   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2159     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2160   } else {
2161     Mat      B;
2162     PetscInt *nnz_d,*nnz_o;
2163     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2164     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2165     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2166     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2167     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2168     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2169     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2170     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2171     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2172     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2173     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2174     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2175     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2176     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2177   }
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2182 
2183 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2184 {
2185 #if defined(PETSC_USE_COMPLEX)
2186   PetscErrorCode ierr;
2187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2188 
2189   PetscFunctionBegin;
2190   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2191   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2192 #else
2193   PetscFunctionBegin;
2194 #endif
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2199 {
2200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2205   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2210 {
2211   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2216   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2221 {
2222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2223   PetscErrorCode ierr;
2224   PetscInt       i,*idxb = 0;
2225   PetscScalar    *va,*vb;
2226   Vec            vtmp;
2227 
2228   PetscFunctionBegin;
2229   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2230   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2231   if (idx) {
2232     for (i=0; i<A->rmap->n; i++) {
2233       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2234     }
2235   }
2236 
2237   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2238   if (idx) {
2239     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2240   }
2241   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2242   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2243 
2244   for (i=0; i<A->rmap->n; i++) {
2245     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2246       va[i] = vb[i];
2247       if (idx) idx[i] = a->garray[idxb[i]];
2248     }
2249   }
2250 
2251   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2252   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2253   ierr = PetscFree(idxb);CHKERRQ(ierr);
2254   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2259 {
2260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2261   PetscErrorCode ierr;
2262   PetscInt       i,*idxb = 0;
2263   PetscScalar    *va,*vb;
2264   Vec            vtmp;
2265 
2266   PetscFunctionBegin;
2267   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2268   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2269   if (idx) {
2270     for (i=0; i<A->cmap->n; i++) {
2271       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2272     }
2273   }
2274 
2275   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2276   if (idx) {
2277     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2278   }
2279   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2280   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2281 
2282   for (i=0; i<A->rmap->n; i++) {
2283     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2284       va[i] = vb[i];
2285       if (idx) idx[i] = a->garray[idxb[i]];
2286     }
2287   }
2288 
2289   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2290   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2291   ierr = PetscFree(idxb);CHKERRQ(ierr);
2292   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2297 {
2298   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2299   PetscInt       n      = A->rmap->n;
2300   PetscInt       cstart = A->cmap->rstart;
2301   PetscInt       *cmap  = mat->garray;
2302   PetscInt       *diagIdx, *offdiagIdx;
2303   Vec            diagV, offdiagV;
2304   PetscScalar    *a, *diagA, *offdiagA;
2305   PetscInt       r;
2306   PetscErrorCode ierr;
2307 
2308   PetscFunctionBegin;
2309   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2310   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2311   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2312   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2313   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2314   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2315   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2316   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2317   for (r = 0; r < n; ++r) {
2318     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2319       a[r]   = diagA[r];
2320       idx[r] = cstart + diagIdx[r];
2321     } else {
2322       a[r]   = offdiagA[r];
2323       idx[r] = cmap[offdiagIdx[r]];
2324     }
2325   }
2326   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2327   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2328   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2329   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2330   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2331   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2336 {
2337   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2338   PetscInt       n      = A->rmap->n;
2339   PetscInt       cstart = A->cmap->rstart;
2340   PetscInt       *cmap  = mat->garray;
2341   PetscInt       *diagIdx, *offdiagIdx;
2342   Vec            diagV, offdiagV;
2343   PetscScalar    *a, *diagA, *offdiagA;
2344   PetscInt       r;
2345   PetscErrorCode ierr;
2346 
2347   PetscFunctionBegin;
2348   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2349   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2351   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2352   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2353   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2354   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2355   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2356   for (r = 0; r < n; ++r) {
2357     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2358       a[r]   = diagA[r];
2359       idx[r] = cstart + diagIdx[r];
2360     } else {
2361       a[r]   = offdiagA[r];
2362       idx[r] = cmap[offdiagIdx[r]];
2363     }
2364   }
2365   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2366   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2367   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2368   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2369   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2370   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2375 {
2376   PetscErrorCode ierr;
2377   Mat            *dummy;
2378 
2379   PetscFunctionBegin;
2380   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2381   *newmat = *dummy;
2382   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2387 {
2388   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2393   A->factorerrortype = a->A->factorerrortype;
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2398 {
2399   PetscErrorCode ierr;
2400   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2404   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2405   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2406   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2411 {
2412   PetscFunctionBegin;
2413   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2414   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 /*@
2419    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2420 
2421    Collective on Mat
2422 
2423    Input Parameters:
2424 +    A - the matrix
2425 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2426 
2427  Level: advanced
2428 
2429 @*/
2430 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2431 {
2432   PetscErrorCode       ierr;
2433 
2434   PetscFunctionBegin;
2435   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2440 {
2441   PetscErrorCode       ierr;
2442   PetscBool            sc = PETSC_FALSE,flg;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2446   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2447   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2448   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2449   if (flg) {
2450     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2451   }
2452   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2457 {
2458   PetscErrorCode ierr;
2459   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2460   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2461 
2462   PetscFunctionBegin;
2463   if (!Y->preallocated) {
2464     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2465   } else if (!aij->nz) {
2466     PetscInt nonew = aij->nonew;
2467     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2468     aij->nonew = nonew;
2469   }
2470   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2475 {
2476   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2481   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2482   if (d) {
2483     PetscInt rstart;
2484     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2485     *d += rstart;
2486 
2487   }
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 
2492 /* -------------------------------------------------------------------*/
2493 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2494                                        MatGetRow_MPIAIJ,
2495                                        MatRestoreRow_MPIAIJ,
2496                                        MatMult_MPIAIJ,
2497                                 /* 4*/ MatMultAdd_MPIAIJ,
2498                                        MatMultTranspose_MPIAIJ,
2499                                        MatMultTransposeAdd_MPIAIJ,
2500                                        0,
2501                                        0,
2502                                        0,
2503                                 /*10*/ 0,
2504                                        0,
2505                                        0,
2506                                        MatSOR_MPIAIJ,
2507                                        MatTranspose_MPIAIJ,
2508                                 /*15*/ MatGetInfo_MPIAIJ,
2509                                        MatEqual_MPIAIJ,
2510                                        MatGetDiagonal_MPIAIJ,
2511                                        MatDiagonalScale_MPIAIJ,
2512                                        MatNorm_MPIAIJ,
2513                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2514                                        MatAssemblyEnd_MPIAIJ,
2515                                        MatSetOption_MPIAIJ,
2516                                        MatZeroEntries_MPIAIJ,
2517                                 /*24*/ MatZeroRows_MPIAIJ,
2518                                        0,
2519                                        0,
2520                                        0,
2521                                        0,
2522                                 /*29*/ MatSetUp_MPIAIJ,
2523                                        0,
2524                                        0,
2525                                        MatGetDiagonalBlock_MPIAIJ,
2526                                        0,
2527                                 /*34*/ MatDuplicate_MPIAIJ,
2528                                        0,
2529                                        0,
2530                                        0,
2531                                        0,
2532                                 /*39*/ MatAXPY_MPIAIJ,
2533                                        MatCreateSubMatrices_MPIAIJ,
2534                                        MatIncreaseOverlap_MPIAIJ,
2535                                        MatGetValues_MPIAIJ,
2536                                        MatCopy_MPIAIJ,
2537                                 /*44*/ MatGetRowMax_MPIAIJ,
2538                                        MatScale_MPIAIJ,
2539                                        MatShift_MPIAIJ,
2540                                        MatDiagonalSet_MPIAIJ,
2541                                        MatZeroRowsColumns_MPIAIJ,
2542                                 /*49*/ MatSetRandom_MPIAIJ,
2543                                        0,
2544                                        0,
2545                                        0,
2546                                        0,
2547                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2548                                        0,
2549                                        MatSetUnfactored_MPIAIJ,
2550                                        MatPermute_MPIAIJ,
2551                                        0,
2552                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2553                                        MatDestroy_MPIAIJ,
2554                                        MatView_MPIAIJ,
2555                                        0,
2556                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2557                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2558                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2563                                        MatGetRowMinAbs_MPIAIJ,
2564                                        0,
2565                                        0,
2566                                        0,
2567                                        0,
2568                                 /*75*/ MatFDColoringApply_AIJ,
2569                                        MatSetFromOptions_MPIAIJ,
2570                                        0,
2571                                        0,
2572                                        MatFindZeroDiagonals_MPIAIJ,
2573                                 /*80*/ 0,
2574                                        0,
2575                                        0,
2576                                 /*83*/ MatLoad_MPIAIJ,
2577                                        MatIsSymmetric_MPIAIJ,
2578                                        0,
2579                                        0,
2580                                        0,
2581                                        0,
2582                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2583                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2584                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2585                                        MatPtAP_MPIAIJ_MPIAIJ,
2586                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2587                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                 /*99*/ 0,
2593                                        0,
2594                                        0,
2595                                        MatConjugate_MPIAIJ,
2596                                        0,
2597                                 /*104*/MatSetValuesRow_MPIAIJ,
2598                                        MatRealPart_MPIAIJ,
2599                                        MatImaginaryPart_MPIAIJ,
2600                                        0,
2601                                        0,
2602                                 /*109*/0,
2603                                        0,
2604                                        MatGetRowMin_MPIAIJ,
2605                                        0,
2606                                        MatMissingDiagonal_MPIAIJ,
2607                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2608                                        0,
2609                                        MatGetGhosts_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                 /*119*/0,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        MatGetMultiProcBlock_MPIAIJ,
2617                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2618                                        MatGetColumnNorms_MPIAIJ,
2619                                        MatInvertBlockDiagonal_MPIAIJ,
2620                                        0,
2621                                        MatCreateSubMatricesMPI_MPIAIJ,
2622                                 /*129*/0,
2623                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2624                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2625                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2626                                        0,
2627                                 /*134*/0,
2628                                        0,
2629                                        MatRARt_MPIAIJ_MPIAIJ,
2630                                        0,
2631                                        0,
2632                                 /*139*/MatSetBlockSizes_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        MatFDColoringSetUp_MPIXAIJ,
2636                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2637                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2638 };
2639 
2640 /* ----------------------------------------------------------------------------------------*/
2641 
2642 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2643 {
2644   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2645   PetscErrorCode ierr;
2646 
2647   PetscFunctionBegin;
2648   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2649   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2654 {
2655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2656   PetscErrorCode ierr;
2657 
2658   PetscFunctionBegin;
2659   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2660   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2665 {
2666   Mat_MPIAIJ     *b;
2667   PetscErrorCode ierr;
2668 
2669   PetscFunctionBegin;
2670   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2671   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2672   b = (Mat_MPIAIJ*)B->data;
2673 
2674 #if defined(PETSC_USE_CTABLE)
2675   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2676 #else
2677   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2678 #endif
2679   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2680   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2681   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2682 
2683   /* Because the B will have been resized we simply destroy it and create a new one each time */
2684   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2685   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2686   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2687   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2688   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2689   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2690 
2691   if (!B->preallocated) {
2692     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2693     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2694     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2695     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2696     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2697   }
2698 
2699   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2700   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2701   B->preallocated  = PETSC_TRUE;
2702   B->was_assembled = PETSC_FALSE;
2703   B->assembled     = PETSC_FALSE;;
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2708 {
2709   Mat_MPIAIJ     *b;
2710   PetscErrorCode ierr;
2711 
2712   PetscFunctionBegin;
2713   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2714   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2715   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2716   b = (Mat_MPIAIJ*)B->data;
2717 
2718 #if defined(PETSC_USE_CTABLE)
2719   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2720 #else
2721   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2722 #endif
2723   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2724   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2725   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2726 
2727   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2728   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2729   B->preallocated  = PETSC_TRUE;
2730   B->was_assembled = PETSC_FALSE;
2731   B->assembled = PETSC_FALSE;
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2736 {
2737   Mat            mat;
2738   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2739   PetscErrorCode ierr;
2740 
2741   PetscFunctionBegin;
2742   *newmat = 0;
2743   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2744   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2745   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2746   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2747   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2748   a       = (Mat_MPIAIJ*)mat->data;
2749 
2750   mat->factortype   = matin->factortype;
2751   mat->assembled    = PETSC_TRUE;
2752   mat->insertmode   = NOT_SET_VALUES;
2753   mat->preallocated = PETSC_TRUE;
2754 
2755   a->size         = oldmat->size;
2756   a->rank         = oldmat->rank;
2757   a->donotstash   = oldmat->donotstash;
2758   a->roworiented  = oldmat->roworiented;
2759   a->rowindices   = 0;
2760   a->rowvalues    = 0;
2761   a->getrowactive = PETSC_FALSE;
2762 
2763   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2764   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2765 
2766   if (oldmat->colmap) {
2767 #if defined(PETSC_USE_CTABLE)
2768     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2769 #else
2770     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2771     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2772     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2773 #endif
2774   } else a->colmap = 0;
2775   if (oldmat->garray) {
2776     PetscInt len;
2777     len  = oldmat->B->cmap->n;
2778     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2779     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2780     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2781   } else a->garray = 0;
2782 
2783   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2784   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2785   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2786   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2787 
2788   if (oldmat->Mvctx_mpi1) {
2789     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2790     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2791   }
2792 
2793   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2794   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2795   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2796   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2797   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2798   *newmat = mat;
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2803 {
2804   PetscScalar    *vals,*svals;
2805   MPI_Comm       comm;
2806   PetscErrorCode ierr;
2807   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2808   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2809   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2810   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2811   PetscInt       cend,cstart,n,*rowners;
2812   int            fd;
2813   PetscInt       bs = newMat->rmap->bs;
2814 
2815   PetscFunctionBegin;
2816   /* force binary viewer to load .info file if it has not yet done so */
2817   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2818   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2819   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2820   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2821   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2822   if (!rank) {
2823     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2824     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2825     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2826   }
2827 
2828   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2829   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2830   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2831   if (bs < 0) bs = 1;
2832 
2833   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2834   M    = header[1]; N = header[2];
2835 
2836   /* If global sizes are set, check if they are consistent with that given in the file */
2837   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2838   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2839 
2840   /* determine ownership of all (block) rows */
2841   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2842   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2843   else m = newMat->rmap->n; /* Set by user */
2844 
2845   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2846   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2847 
2848   /* First process needs enough room for process with most rows */
2849   if (!rank) {
2850     mmax = rowners[1];
2851     for (i=2; i<=size; i++) {
2852       mmax = PetscMax(mmax, rowners[i]);
2853     }
2854   } else mmax = -1;             /* unused, but compilers complain */
2855 
2856   rowners[0] = 0;
2857   for (i=2; i<=size; i++) {
2858     rowners[i] += rowners[i-1];
2859   }
2860   rstart = rowners[rank];
2861   rend   = rowners[rank+1];
2862 
2863   /* distribute row lengths to all processors */
2864   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2865   if (!rank) {
2866     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2867     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2868     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2869     for (j=0; j<m; j++) {
2870       procsnz[0] += ourlens[j];
2871     }
2872     for (i=1; i<size; i++) {
2873       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2874       /* calculate the number of nonzeros on each processor */
2875       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2876         procsnz[i] += rowlengths[j];
2877       }
2878       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2879     }
2880     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2881   } else {
2882     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2883   }
2884 
2885   if (!rank) {
2886     /* determine max buffer needed and allocate it */
2887     maxnz = 0;
2888     for (i=0; i<size; i++) {
2889       maxnz = PetscMax(maxnz,procsnz[i]);
2890     }
2891     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2892 
2893     /* read in my part of the matrix column indices  */
2894     nz   = procsnz[0];
2895     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2896     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2897 
2898     /* read in every one elses and ship off */
2899     for (i=1; i<size; i++) {
2900       nz   = procsnz[i];
2901       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2902       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2903     }
2904     ierr = PetscFree(cols);CHKERRQ(ierr);
2905   } else {
2906     /* determine buffer space needed for message */
2907     nz = 0;
2908     for (i=0; i<m; i++) {
2909       nz += ourlens[i];
2910     }
2911     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2912 
2913     /* receive message of column indices*/
2914     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2915   }
2916 
2917   /* determine column ownership if matrix is not square */
2918   if (N != M) {
2919     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2920     else n = newMat->cmap->n;
2921     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2922     cstart = cend - n;
2923   } else {
2924     cstart = rstart;
2925     cend   = rend;
2926     n      = cend - cstart;
2927   }
2928 
2929   /* loop over local rows, determining number of off diagonal entries */
2930   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2931   jj   = 0;
2932   for (i=0; i<m; i++) {
2933     for (j=0; j<ourlens[i]; j++) {
2934       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2935       jj++;
2936     }
2937   }
2938 
2939   for (i=0; i<m; i++) {
2940     ourlens[i] -= offlens[i];
2941   }
2942   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2943 
2944   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2945 
2946   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2947 
2948   for (i=0; i<m; i++) {
2949     ourlens[i] += offlens[i];
2950   }
2951 
2952   if (!rank) {
2953     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2954 
2955     /* read in my part of the matrix numerical values  */
2956     nz   = procsnz[0];
2957     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2958 
2959     /* insert into matrix */
2960     jj      = rstart;
2961     smycols = mycols;
2962     svals   = vals;
2963     for (i=0; i<m; i++) {
2964       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2965       smycols += ourlens[i];
2966       svals   += ourlens[i];
2967       jj++;
2968     }
2969 
2970     /* read in other processors and ship out */
2971     for (i=1; i<size; i++) {
2972       nz   = procsnz[i];
2973       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2974       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2975     }
2976     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2977   } else {
2978     /* receive numeric values */
2979     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2980 
2981     /* receive message of values*/
2982     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2983 
2984     /* insert into matrix */
2985     jj      = rstart;
2986     smycols = mycols;
2987     svals   = vals;
2988     for (i=0; i<m; i++) {
2989       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2990       smycols += ourlens[i];
2991       svals   += ourlens[i];
2992       jj++;
2993     }
2994   }
2995   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2996   ierr = PetscFree(vals);CHKERRQ(ierr);
2997   ierr = PetscFree(mycols);CHKERRQ(ierr);
2998   ierr = PetscFree(rowners);CHKERRQ(ierr);
2999   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3000   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 /* Not scalable because of ISAllGather() unless getting all columns. */
3005 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3006 {
3007   PetscErrorCode ierr;
3008   IS             iscol_local;
3009   PetscBool      isstride;
3010   PetscMPIInt    lisstride=0,gisstride;
3011 
3012   PetscFunctionBegin;
3013   /* check if we are grabbing all columns*/
3014   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3015 
3016   if (isstride) {
3017     PetscInt  start,len,mstart,mlen;
3018     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3019     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3020     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3021     if (mstart == start && mlen-mstart == len) lisstride = 1;
3022   }
3023 
3024   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3025   if (gisstride) {
3026     PetscInt N;
3027     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3028     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3029     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3030     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3031   } else {
3032     PetscInt cbs;
3033     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3034     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3035     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3036   }
3037 
3038   *isseq = iscol_local;
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 /*
3043  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3044  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3045 
3046  Input Parameters:
3047    mat - matrix
3048    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3049            i.e., mat->rstart <= isrow[i] < mat->rend
3050    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3051            i.e., mat->cstart <= iscol[i] < mat->cend
3052  Output Parameter:
3053    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3054    iscol_o - sequential column index set for retrieving mat->B
3055    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3056  */
3057 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3058 {
3059   PetscErrorCode ierr;
3060   Vec            x,cmap;
3061   const PetscInt *is_idx;
3062   PetscScalar    *xarray,*cmaparray;
3063   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3064   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3065   Mat            B=a->B;
3066   Vec            lvec=a->lvec,lcmap;
3067   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3068   MPI_Comm       comm;
3069   VecScatter     Mvctx=a->Mvctx;
3070 
3071   PetscFunctionBegin;
3072   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3073   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3074 
3075   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3076   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3077   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3078   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3079   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3080 
3081   /* Get start indices */
3082   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3083   isstart -= ncols;
3084   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3085 
3086   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3087   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3088   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3089   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3090   for (i=0; i<ncols; i++) {
3091     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3092     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3093     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3094   }
3095   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3096   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3097   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3098 
3099   /* Get iscol_d */
3100   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3101   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3102   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3103 
3104   /* Get isrow_d */
3105   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3106   rstart = mat->rmap->rstart;
3107   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3108   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3109   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3110   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3111 
3112   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3113   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3114   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3115 
3116   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3117   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3118   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3119 
3120   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3121 
3122   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3123   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3124 
3125   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3126   /* off-process column indices */
3127   count = 0;
3128   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3129   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3130 
3131   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3132   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3133   for (i=0; i<Bn; i++) {
3134     if (PetscRealPart(xarray[i]) > -1.0) {
3135       idx[count]     = i;                   /* local column index in off-diagonal part B */
3136       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3137       count++;
3138     }
3139   }
3140   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3141   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3142 
3143   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3144   /* cannot ensure iscol_o has same blocksize as iscol! */
3145 
3146   ierr = PetscFree(idx);CHKERRQ(ierr);
3147   *garray = cmap1;
3148 
3149   ierr = VecDestroy(&x);CHKERRQ(ierr);
3150   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3151   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3152   PetscFunctionReturn(0);
3153 }
3154 
3155 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3156 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3157 {
3158   PetscErrorCode ierr;
3159   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3160   Mat            M = NULL;
3161   MPI_Comm       comm;
3162   IS             iscol_d,isrow_d,iscol_o;
3163   Mat            Asub = NULL,Bsub = NULL;
3164   PetscInt       n;
3165 
3166   PetscFunctionBegin;
3167   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3168 
3169   if (call == MAT_REUSE_MATRIX) {
3170     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3171     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3172     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3173 
3174     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3175     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3176 
3177     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3178     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3179 
3180     /* Update diagonal and off-diagonal portions of submat */
3181     asub = (Mat_MPIAIJ*)(*submat)->data;
3182     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3183     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3184     if (n) {
3185       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3186     }
3187     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3188     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3189 
3190   } else { /* call == MAT_INITIAL_MATRIX) */
3191     const PetscInt *garray;
3192     PetscInt        BsubN;
3193 
3194     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3195     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3196 
3197     /* Create local submatrices Asub and Bsub */
3198     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3199     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3200 
3201     /* Create submatrix M */
3202     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3203 
3204     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3205     asub = (Mat_MPIAIJ*)M->data;
3206 
3207     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3208     n = asub->B->cmap->N;
3209     if (BsubN > n) {
3210       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3211       const PetscInt *idx;
3212       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3213       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3214 
3215       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3216       j = 0;
3217       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3218       for (i=0; i<n; i++) {
3219         if (j >= BsubN) break;
3220         while (subgarray[i] > garray[j]) j++;
3221 
3222         if (subgarray[i] == garray[j]) {
3223           idx_new[i] = idx[j++];
3224         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3225       }
3226       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3227 
3228       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3229       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3230 
3231     } else if (BsubN < n) {
3232       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3233     }
3234 
3235     ierr = PetscFree(garray);CHKERRQ(ierr);
3236     *submat = M;
3237 
3238     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3239     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3240     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3241 
3242     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3243     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3244 
3245     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3246     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3247   }
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3252 {
3253   PetscErrorCode ierr;
3254   IS             iscol_local=NULL,isrow_d;
3255   PetscInt       csize;
3256   PetscInt       n,i,j,start,end;
3257   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3258   MPI_Comm       comm;
3259 
3260   PetscFunctionBegin;
3261   /* If isrow has same processor distribution as mat,
3262      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3263   if (call == MAT_REUSE_MATRIX) {
3264     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3265     if (isrow_d) {
3266       sameRowDist  = PETSC_TRUE;
3267       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3268     } else {
3269       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3270       if (iscol_local) {
3271         sameRowDist  = PETSC_TRUE;
3272         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3273       }
3274     }
3275   } else {
3276     /* Check if isrow has same processor distribution as mat */
3277     sameDist[0] = PETSC_FALSE;
3278     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3279     if (!n) {
3280       sameDist[0] = PETSC_TRUE;
3281     } else {
3282       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3283       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3284       if (i >= start && j < end) {
3285         sameDist[0] = PETSC_TRUE;
3286       }
3287     }
3288 
3289     /* Check if iscol has same processor distribution as mat */
3290     sameDist[1] = PETSC_FALSE;
3291     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3292     if (!n) {
3293       sameDist[1] = PETSC_TRUE;
3294     } else {
3295       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3296       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3297       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3298     }
3299 
3300     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3301     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3302     sameRowDist = tsameDist[0];
3303   }
3304 
3305   if (sameRowDist) {
3306     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3307       /* isrow and iscol have same processor distribution as mat */
3308       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3309       PetscFunctionReturn(0);
3310     } else { /* sameRowDist */
3311       /* isrow has same processor distribution as mat */
3312       if (call == MAT_INITIAL_MATRIX) {
3313         PetscBool sorted;
3314         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3315         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3316         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3317         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3318 
3319         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3320         if (sorted) {
3321           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3322           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3323           PetscFunctionReturn(0);
3324         }
3325       } else { /* call == MAT_REUSE_MATRIX */
3326         IS    iscol_sub;
3327         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3328         if (iscol_sub) {
3329           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3330           PetscFunctionReturn(0);
3331         }
3332       }
3333     }
3334   }
3335 
3336   /* General case: iscol -> iscol_local which has global size of iscol */
3337   if (call == MAT_REUSE_MATRIX) {
3338     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3339     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3340   } else {
3341     if (!iscol_local) {
3342       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3343     }
3344   }
3345 
3346   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3347   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3348 
3349   if (call == MAT_INITIAL_MATRIX) {
3350     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3351     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3352   }
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 /*@C
3357      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3358          and "off-diagonal" part of the matrix in CSR format.
3359 
3360    Collective on MPI_Comm
3361 
3362    Input Parameters:
3363 +  comm - MPI communicator
3364 .  A - "diagonal" portion of matrix
3365 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3366 -  garray - global index of B columns
3367 
3368    Output Parameter:
3369 .   mat - the matrix, with input A as its local diagonal matrix
3370    Level: advanced
3371 
3372    Notes:
3373        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3374        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3375 
3376 .seealso: MatCreateMPIAIJWithSplitArrays()
3377 @*/
3378 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3379 {
3380   PetscErrorCode ierr;
3381   Mat_MPIAIJ     *maij;
3382   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3383   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3384   PetscScalar    *oa=b->a;
3385   Mat            Bnew;
3386   PetscInt       m,n,N;
3387 
3388   PetscFunctionBegin;
3389   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3390   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3391   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3392   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3393   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3394   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3395 
3396   /* Get global columns of mat */
3397   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3398 
3399   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3400   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3401   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3402   maij = (Mat_MPIAIJ*)(*mat)->data;
3403 
3404   (*mat)->preallocated = PETSC_TRUE;
3405 
3406   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3407   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3408 
3409   /* Set A as diagonal portion of *mat */
3410   maij->A = A;
3411 
3412   nz = oi[m];
3413   for (i=0; i<nz; i++) {
3414     col   = oj[i];
3415     oj[i] = garray[col];
3416   }
3417 
3418    /* Set Bnew as off-diagonal portion of *mat */
3419   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3420   bnew        = (Mat_SeqAIJ*)Bnew->data;
3421   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3422   maij->B     = Bnew;
3423 
3424   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3425 
3426   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3427   b->free_a       = PETSC_FALSE;
3428   b->free_ij      = PETSC_FALSE;
3429   ierr = MatDestroy(&B);CHKERRQ(ierr);
3430 
3431   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3432   bnew->free_a       = PETSC_TRUE;
3433   bnew->free_ij      = PETSC_TRUE;
3434 
3435   /* condense columns of maij->B */
3436   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3437   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3438   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3439   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3440   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3441   PetscFunctionReturn(0);
3442 }
3443 
3444 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3445 
3446 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3447 {
3448   PetscErrorCode ierr;
3449   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3450   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3451   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3452   Mat            M,Msub,B=a->B;
3453   MatScalar      *aa;
3454   Mat_SeqAIJ     *aij;
3455   PetscInt       *garray = a->garray,*colsub,Ncols;
3456   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3457   IS             iscol_sub,iscmap;
3458   const PetscInt *is_idx,*cmap;
3459   PetscBool      allcolumns=PETSC_FALSE;
3460   MPI_Comm       comm;
3461 
3462   PetscFunctionBegin;
3463   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3464 
3465   if (call == MAT_REUSE_MATRIX) {
3466     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3467     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3468     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3469 
3470     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3471     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3472 
3473     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3474     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3475 
3476     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3477 
3478   } else { /* call == MAT_INITIAL_MATRIX) */
3479     PetscBool flg;
3480 
3481     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3482     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3483 
3484     /* (1) iscol -> nonscalable iscol_local */
3485     /* Check for special case: each processor gets entire matrix columns */
3486     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3487     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3488     if (allcolumns) {
3489       iscol_sub = iscol_local;
3490       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3491       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3492 
3493     } else {
3494       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3495       PetscInt *idx,*cmap1,k;
3496       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3497       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3498       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3499       count = 0;
3500       k     = 0;
3501       for (i=0; i<Ncols; i++) {
3502         j = is_idx[i];
3503         if (j >= cstart && j < cend) {
3504           /* diagonal part of mat */
3505           idx[count]     = j;
3506           cmap1[count++] = i; /* column index in submat */
3507         } else if (Bn) {
3508           /* off-diagonal part of mat */
3509           if (j == garray[k]) {
3510             idx[count]     = j;
3511             cmap1[count++] = i;  /* column index in submat */
3512           } else if (j > garray[k]) {
3513             while (j > garray[k] && k < Bn-1) k++;
3514             if (j == garray[k]) {
3515               idx[count]     = j;
3516               cmap1[count++] = i; /* column index in submat */
3517             }
3518           }
3519         }
3520       }
3521       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3522 
3523       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3524       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3525       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3526 
3527       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3528     }
3529 
3530     /* (3) Create sequential Msub */
3531     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3532   }
3533 
3534   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3535   aij  = (Mat_SeqAIJ*)(Msub)->data;
3536   ii   = aij->i;
3537   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3538 
3539   /*
3540       m - number of local rows
3541       Ncols - number of columns (same on all processors)
3542       rstart - first row in new global matrix generated
3543   */
3544   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3545 
3546   if (call == MAT_INITIAL_MATRIX) {
3547     /* (4) Create parallel newmat */
3548     PetscMPIInt    rank,size;
3549     PetscInt       csize;
3550 
3551     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3552     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3553 
3554     /*
3555         Determine the number of non-zeros in the diagonal and off-diagonal
3556         portions of the matrix in order to do correct preallocation
3557     */
3558 
3559     /* first get start and end of "diagonal" columns */
3560     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3561     if (csize == PETSC_DECIDE) {
3562       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3563       if (mglobal == Ncols) { /* square matrix */
3564         nlocal = m;
3565       } else {
3566         nlocal = Ncols/size + ((Ncols % size) > rank);
3567       }
3568     } else {
3569       nlocal = csize;
3570     }
3571     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3572     rstart = rend - nlocal;
3573     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3574 
3575     /* next, compute all the lengths */
3576     jj    = aij->j;
3577     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3578     olens = dlens + m;
3579     for (i=0; i<m; i++) {
3580       jend = ii[i+1] - ii[i];
3581       olen = 0;
3582       dlen = 0;
3583       for (j=0; j<jend; j++) {
3584         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3585         else dlen++;
3586         jj++;
3587       }
3588       olens[i] = olen;
3589       dlens[i] = dlen;
3590     }
3591 
3592     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3593     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3594 
3595     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3596     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3597     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3598     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3599     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3600     ierr = PetscFree(dlens);CHKERRQ(ierr);
3601 
3602   } else { /* call == MAT_REUSE_MATRIX */
3603     M    = *newmat;
3604     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3605     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3606     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3607     /*
3608          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3609        rather than the slower MatSetValues().
3610     */
3611     M->was_assembled = PETSC_TRUE;
3612     M->assembled     = PETSC_FALSE;
3613   }
3614 
3615   /* (5) Set values of Msub to *newmat */
3616   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3617   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3618 
3619   jj   = aij->j;
3620   aa   = aij->a;
3621   for (i=0; i<m; i++) {
3622     row = rstart + i;
3623     nz  = ii[i+1] - ii[i];
3624     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3625     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3626     jj += nz; aa += nz;
3627   }
3628   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3629 
3630   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632 
3633   ierr = PetscFree(colsub);CHKERRQ(ierr);
3634 
3635   /* save Msub, iscol_sub and iscmap used in processor for next request */
3636   if (call ==  MAT_INITIAL_MATRIX) {
3637     *newmat = M;
3638     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3639     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3640 
3641     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3642     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3643 
3644     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3645     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3646 
3647     if (iscol_local) {
3648       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3649       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3650     }
3651   }
3652   PetscFunctionReturn(0);
3653 }
3654 
3655 /*
3656     Not great since it makes two copies of the submatrix, first an SeqAIJ
3657   in local and then by concatenating the local matrices the end result.
3658   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3659 
3660   Note: This requires a sequential iscol with all indices.
3661 */
3662 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3663 {
3664   PetscErrorCode ierr;
3665   PetscMPIInt    rank,size;
3666   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3667   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3668   Mat            M,Mreuse;
3669   MatScalar      *aa,*vwork;
3670   MPI_Comm       comm;
3671   Mat_SeqAIJ     *aij;
3672   PetscBool      colflag,allcolumns=PETSC_FALSE;
3673 
3674   PetscFunctionBegin;
3675   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3677   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3678 
3679   /* Check for special case: each processor gets entire matrix columns */
3680   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3681   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3682   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3683 
3684   if (call ==  MAT_REUSE_MATRIX) {
3685     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3686     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3687     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3688   } else {
3689     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3690   }
3691 
3692   /*
3693       m - number of local rows
3694       n - number of columns (same on all processors)
3695       rstart - first row in new global matrix generated
3696   */
3697   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3698   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3699   if (call == MAT_INITIAL_MATRIX) {
3700     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3701     ii  = aij->i;
3702     jj  = aij->j;
3703 
3704     /*
3705         Determine the number of non-zeros in the diagonal and off-diagonal
3706         portions of the matrix in order to do correct preallocation
3707     */
3708 
3709     /* first get start and end of "diagonal" columns */
3710     if (csize == PETSC_DECIDE) {
3711       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3712       if (mglobal == n) { /* square matrix */
3713         nlocal = m;
3714       } else {
3715         nlocal = n/size + ((n % size) > rank);
3716       }
3717     } else {
3718       nlocal = csize;
3719     }
3720     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3721     rstart = rend - nlocal;
3722     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3723 
3724     /* next, compute all the lengths */
3725     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3726     olens = dlens + m;
3727     for (i=0; i<m; i++) {
3728       jend = ii[i+1] - ii[i];
3729       olen = 0;
3730       dlen = 0;
3731       for (j=0; j<jend; j++) {
3732         if (*jj < rstart || *jj >= rend) olen++;
3733         else dlen++;
3734         jj++;
3735       }
3736       olens[i] = olen;
3737       dlens[i] = dlen;
3738     }
3739     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3740     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3741     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3742     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3743     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3744     ierr = PetscFree(dlens);CHKERRQ(ierr);
3745   } else {
3746     PetscInt ml,nl;
3747 
3748     M    = *newmat;
3749     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3750     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3751     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3752     /*
3753          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3754        rather than the slower MatSetValues().
3755     */
3756     M->was_assembled = PETSC_TRUE;
3757     M->assembled     = PETSC_FALSE;
3758   }
3759   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3760   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3761   ii   = aij->i;
3762   jj   = aij->j;
3763   aa   = aij->a;
3764   for (i=0; i<m; i++) {
3765     row   = rstart + i;
3766     nz    = ii[i+1] - ii[i];
3767     cwork = jj;     jj += nz;
3768     vwork = aa;     aa += nz;
3769     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3770   }
3771 
3772   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3773   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3774   *newmat = M;
3775 
3776   /* save submatrix used in processor for next request */
3777   if (call ==  MAT_INITIAL_MATRIX) {
3778     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3779     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3780   }
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3785 {
3786   PetscInt       m,cstart, cend,j,nnz,i,d;
3787   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3788   const PetscInt *JJ;
3789   PetscScalar    *values;
3790   PetscErrorCode ierr;
3791   PetscBool      nooffprocentries;
3792 
3793   PetscFunctionBegin;
3794   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3795 
3796   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3797   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3798   m      = B->rmap->n;
3799   cstart = B->cmap->rstart;
3800   cend   = B->cmap->rend;
3801   rstart = B->rmap->rstart;
3802 
3803   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3804 
3805 #if defined(PETSC_USE_DEBUG)
3806   for (i=0; i<m; i++) {
3807     nnz = Ii[i+1]- Ii[i];
3808     JJ  = J + Ii[i];
3809     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3810     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3811     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3812   }
3813 #endif
3814 
3815   for (i=0; i<m; i++) {
3816     nnz     = Ii[i+1]- Ii[i];
3817     JJ      = J + Ii[i];
3818     nnz_max = PetscMax(nnz_max,nnz);
3819     d       = 0;
3820     for (j=0; j<nnz; j++) {
3821       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3822     }
3823     d_nnz[i] = d;
3824     o_nnz[i] = nnz - d;
3825   }
3826   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3827   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3828 
3829   if (v) values = (PetscScalar*)v;
3830   else {
3831     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3832   }
3833 
3834   for (i=0; i<m; i++) {
3835     ii   = i + rstart;
3836     nnz  = Ii[i+1]- Ii[i];
3837     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3838   }
3839   nooffprocentries    = B->nooffprocentries;
3840   B->nooffprocentries = PETSC_TRUE;
3841   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3842   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843   B->nooffprocentries = nooffprocentries;
3844 
3845   if (!v) {
3846     ierr = PetscFree(values);CHKERRQ(ierr);
3847   }
3848   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3849   PetscFunctionReturn(0);
3850 }
3851 
3852 /*@
3853    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3854    (the default parallel PETSc format).
3855 
3856    Collective on MPI_Comm
3857 
3858    Input Parameters:
3859 +  B - the matrix
3860 .  i - the indices into j for the start of each local row (starts with zero)
3861 .  j - the column indices for each local row (starts with zero)
3862 -  v - optional values in the matrix
3863 
3864    Level: developer
3865 
3866    Notes:
3867        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3868      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3869      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3870 
3871        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3872 
3873        The format which is used for the sparse matrix input, is equivalent to a
3874     row-major ordering.. i.e for the following matrix, the input data expected is
3875     as shown
3876 
3877 $        1 0 0
3878 $        2 0 3     P0
3879 $       -------
3880 $        4 5 6     P1
3881 $
3882 $     Process0 [P0]: rows_owned=[0,1]
3883 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3884 $        j =  {0,0,2}  [size = 3]
3885 $        v =  {1,2,3}  [size = 3]
3886 $
3887 $     Process1 [P1]: rows_owned=[2]
3888 $        i =  {0,3}    [size = nrow+1  = 1+1]
3889 $        j =  {0,1,2}  [size = 3]
3890 $        v =  {4,5,6}  [size = 3]
3891 
3892 .keywords: matrix, aij, compressed row, sparse, parallel
3893 
3894 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3895           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3896 @*/
3897 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3898 {
3899   PetscErrorCode ierr;
3900 
3901   PetscFunctionBegin;
3902   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3903   PetscFunctionReturn(0);
3904 }
3905 
3906 /*@C
3907    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3908    (the default parallel PETSc format).  For good matrix assembly performance
3909    the user should preallocate the matrix storage by setting the parameters
3910    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3911    performance can be increased by more than a factor of 50.
3912 
3913    Collective on MPI_Comm
3914 
3915    Input Parameters:
3916 +  B - the matrix
3917 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3918            (same value is used for all local rows)
3919 .  d_nnz - array containing the number of nonzeros in the various rows of the
3920            DIAGONAL portion of the local submatrix (possibly different for each row)
3921            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3922            The size of this array is equal to the number of local rows, i.e 'm'.
3923            For matrices that will be factored, you must leave room for (and set)
3924            the diagonal entry even if it is zero.
3925 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3926            submatrix (same value is used for all local rows).
3927 -  o_nnz - array containing the number of nonzeros in the various rows of the
3928            OFF-DIAGONAL portion of the local submatrix (possibly different for
3929            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3930            structure. The size of this array is equal to the number
3931            of local rows, i.e 'm'.
3932 
3933    If the *_nnz parameter is given then the *_nz parameter is ignored
3934 
3935    The AIJ format (also called the Yale sparse matrix format or
3936    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3937    storage.  The stored row and column indices begin with zero.
3938    See Users-Manual: ch_mat for details.
3939 
3940    The parallel matrix is partitioned such that the first m0 rows belong to
3941    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3942    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3943 
3944    The DIAGONAL portion of the local submatrix of a processor can be defined
3945    as the submatrix which is obtained by extraction the part corresponding to
3946    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3947    first row that belongs to the processor, r2 is the last row belonging to
3948    the this processor, and c1-c2 is range of indices of the local part of a
3949    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3950    common case of a square matrix, the row and column ranges are the same and
3951    the DIAGONAL part is also square. The remaining portion of the local
3952    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3953 
3954    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3955 
3956    You can call MatGetInfo() to get information on how effective the preallocation was;
3957    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3958    You can also run with the option -info and look for messages with the string
3959    malloc in them to see if additional memory allocation was needed.
3960 
3961    Example usage:
3962 
3963    Consider the following 8x8 matrix with 34 non-zero values, that is
3964    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3965    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3966    as follows:
3967 
3968 .vb
3969             1  2  0  |  0  3  0  |  0  4
3970     Proc0   0  5  6  |  7  0  0  |  8  0
3971             9  0 10  | 11  0  0  | 12  0
3972     -------------------------------------
3973            13  0 14  | 15 16 17  |  0  0
3974     Proc1   0 18  0  | 19 20 21  |  0  0
3975             0  0  0  | 22 23  0  | 24  0
3976     -------------------------------------
3977     Proc2  25 26 27  |  0  0 28  | 29  0
3978            30  0  0  | 31 32 33  |  0 34
3979 .ve
3980 
3981    This can be represented as a collection of submatrices as:
3982 
3983 .vb
3984       A B C
3985       D E F
3986       G H I
3987 .ve
3988 
3989    Where the submatrices A,B,C are owned by proc0, D,E,F are
3990    owned by proc1, G,H,I are owned by proc2.
3991 
3992    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3994    The 'M','N' parameters are 8,8, and have the same values on all procs.
3995 
3996    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3997    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3998    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3999    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4000    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4001    matrix, ans [DF] as another SeqAIJ matrix.
4002 
4003    When d_nz, o_nz parameters are specified, d_nz storage elements are
4004    allocated for every row of the local diagonal submatrix, and o_nz
4005    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4006    One way to choose d_nz and o_nz is to use the max nonzerors per local
4007    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4008    In this case, the values of d_nz,o_nz are:
4009 .vb
4010      proc0 : dnz = 2, o_nz = 2
4011      proc1 : dnz = 3, o_nz = 2
4012      proc2 : dnz = 1, o_nz = 4
4013 .ve
4014    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4015    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4016    for proc3. i.e we are using 12+15+10=37 storage locations to store
4017    34 values.
4018 
4019    When d_nnz, o_nnz parameters are specified, the storage is specified
4020    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4021    In the above case the values for d_nnz,o_nnz are:
4022 .vb
4023      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4024      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4025      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4026 .ve
4027    Here the space allocated is sum of all the above values i.e 34, and
4028    hence pre-allocation is perfect.
4029 
4030    Level: intermediate
4031 
4032 .keywords: matrix, aij, compressed row, sparse, parallel
4033 
4034 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4035           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4036 @*/
4037 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4038 {
4039   PetscErrorCode ierr;
4040 
4041   PetscFunctionBegin;
4042   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4043   PetscValidType(B,1);
4044   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4045   PetscFunctionReturn(0);
4046 }
4047 
4048 /*@
4049      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4050          CSR format the local rows.
4051 
4052    Collective on MPI_Comm
4053 
4054    Input Parameters:
4055 +  comm - MPI communicator
4056 .  m - number of local rows (Cannot be PETSC_DECIDE)
4057 .  n - This value should be the same as the local size used in creating the
4058        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4059        calculated if N is given) For square matrices n is almost always m.
4060 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4061 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4062 .   i - row indices
4063 .   j - column indices
4064 -   a - matrix values
4065 
4066    Output Parameter:
4067 .   mat - the matrix
4068 
4069    Level: intermediate
4070 
4071    Notes:
4072        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4073      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4074      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4075 
4076        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4077 
4078        The format which is used for the sparse matrix input, is equivalent to a
4079     row-major ordering.. i.e for the following matrix, the input data expected is
4080     as shown
4081 
4082 $        1 0 0
4083 $        2 0 3     P0
4084 $       -------
4085 $        4 5 6     P1
4086 $
4087 $     Process0 [P0]: rows_owned=[0,1]
4088 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4089 $        j =  {0,0,2}  [size = 3]
4090 $        v =  {1,2,3}  [size = 3]
4091 $
4092 $     Process1 [P1]: rows_owned=[2]
4093 $        i =  {0,3}    [size = nrow+1  = 1+1]
4094 $        j =  {0,1,2}  [size = 3]
4095 $        v =  {4,5,6}  [size = 3]
4096 
4097 .keywords: matrix, aij, compressed row, sparse, parallel
4098 
4099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4100           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4101 @*/
4102 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4103 {
4104   PetscErrorCode ierr;
4105 
4106   PetscFunctionBegin;
4107   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4108   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4109   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4110   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4111   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4112   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4113   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4114   PetscFunctionReturn(0);
4115 }
4116 
4117 /*@C
4118    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4119    (the default parallel PETSc format).  For good matrix assembly performance
4120    the user should preallocate the matrix storage by setting the parameters
4121    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4122    performance can be increased by more than a factor of 50.
4123 
4124    Collective on MPI_Comm
4125 
4126    Input Parameters:
4127 +  comm - MPI communicator
4128 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4129            This value should be the same as the local size used in creating the
4130            y vector for the matrix-vector product y = Ax.
4131 .  n - This value should be the same as the local size used in creating the
4132        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4133        calculated if N is given) For square matrices n is almost always m.
4134 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4135 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4136 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4137            (same value is used for all local rows)
4138 .  d_nnz - array containing the number of nonzeros in the various rows of the
4139            DIAGONAL portion of the local submatrix (possibly different for each row)
4140            or NULL, if d_nz is used to specify the nonzero structure.
4141            The size of this array is equal to the number of local rows, i.e 'm'.
4142 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4143            submatrix (same value is used for all local rows).
4144 -  o_nnz - array containing the number of nonzeros in the various rows of the
4145            OFF-DIAGONAL portion of the local submatrix (possibly different for
4146            each row) or NULL, if o_nz is used to specify the nonzero
4147            structure. The size of this array is equal to the number
4148            of local rows, i.e 'm'.
4149 
4150    Output Parameter:
4151 .  A - the matrix
4152 
4153    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4154    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4155    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4156 
4157    Notes:
4158    If the *_nnz parameter is given then the *_nz parameter is ignored
4159 
4160    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4161    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4162    storage requirements for this matrix.
4163 
4164    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4165    processor than it must be used on all processors that share the object for
4166    that argument.
4167 
4168    The user MUST specify either the local or global matrix dimensions
4169    (possibly both).
4170 
4171    The parallel matrix is partitioned across processors such that the
4172    first m0 rows belong to process 0, the next m1 rows belong to
4173    process 1, the next m2 rows belong to process 2 etc.. where
4174    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4175    values corresponding to [m x N] submatrix.
4176 
4177    The columns are logically partitioned with the n0 columns belonging
4178    to 0th partition, the next n1 columns belonging to the next
4179    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4180 
4181    The DIAGONAL portion of the local submatrix on any given processor
4182    is the submatrix corresponding to the rows and columns m,n
4183    corresponding to the given processor. i.e diagonal matrix on
4184    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4185    etc. The remaining portion of the local submatrix [m x (N-n)]
4186    constitute the OFF-DIAGONAL portion. The example below better
4187    illustrates this concept.
4188 
4189    For a square global matrix we define each processor's diagonal portion
4190    to be its local rows and the corresponding columns (a square submatrix);
4191    each processor's off-diagonal portion encompasses the remainder of the
4192    local matrix (a rectangular submatrix).
4193 
4194    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4195 
4196    When calling this routine with a single process communicator, a matrix of
4197    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4198    type of communicator, use the construction mechanism
4199 .vb
4200      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4201 .ve
4202 
4203 $     MatCreate(...,&A);
4204 $     MatSetType(A,MATMPIAIJ);
4205 $     MatSetSizes(A, m,n,M,N);
4206 $     MatMPIAIJSetPreallocation(A,...);
4207 
4208    By default, this format uses inodes (identical nodes) when possible.
4209    We search for consecutive rows with the same nonzero structure, thereby
4210    reusing matrix information to achieve increased efficiency.
4211 
4212    Options Database Keys:
4213 +  -mat_no_inode  - Do not use inodes
4214 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4215 
4216 
4217 
4218    Example usage:
4219 
4220    Consider the following 8x8 matrix with 34 non-zero values, that is
4221    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4222    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4223    as follows
4224 
4225 .vb
4226             1  2  0  |  0  3  0  |  0  4
4227     Proc0   0  5  6  |  7  0  0  |  8  0
4228             9  0 10  | 11  0  0  | 12  0
4229     -------------------------------------
4230            13  0 14  | 15 16 17  |  0  0
4231     Proc1   0 18  0  | 19 20 21  |  0  0
4232             0  0  0  | 22 23  0  | 24  0
4233     -------------------------------------
4234     Proc2  25 26 27  |  0  0 28  | 29  0
4235            30  0  0  | 31 32 33  |  0 34
4236 .ve
4237 
4238    This can be represented as a collection of submatrices as
4239 
4240 .vb
4241       A B C
4242       D E F
4243       G H I
4244 .ve
4245 
4246    Where the submatrices A,B,C are owned by proc0, D,E,F are
4247    owned by proc1, G,H,I are owned by proc2.
4248 
4249    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4250    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4251    The 'M','N' parameters are 8,8, and have the same values on all procs.
4252 
4253    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4254    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4255    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4256    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4257    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4258    matrix, ans [DF] as another SeqAIJ matrix.
4259 
4260    When d_nz, o_nz parameters are specified, d_nz storage elements are
4261    allocated for every row of the local diagonal submatrix, and o_nz
4262    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4263    One way to choose d_nz and o_nz is to use the max nonzerors per local
4264    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4265    In this case, the values of d_nz,o_nz are
4266 .vb
4267      proc0 : dnz = 2, o_nz = 2
4268      proc1 : dnz = 3, o_nz = 2
4269      proc2 : dnz = 1, o_nz = 4
4270 .ve
4271    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4272    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4273    for proc3. i.e we are using 12+15+10=37 storage locations to store
4274    34 values.
4275 
4276    When d_nnz, o_nnz parameters are specified, the storage is specified
4277    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4278    In the above case the values for d_nnz,o_nnz are
4279 .vb
4280      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4281      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4282      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4283 .ve
4284    Here the space allocated is sum of all the above values i.e 34, and
4285    hence pre-allocation is perfect.
4286 
4287    Level: intermediate
4288 
4289 .keywords: matrix, aij, compressed row, sparse, parallel
4290 
4291 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4292           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4293 @*/
4294 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4295 {
4296   PetscErrorCode ierr;
4297   PetscMPIInt    size;
4298 
4299   PetscFunctionBegin;
4300   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4301   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4302   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4303   if (size > 1) {
4304     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4305     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4306   } else {
4307     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4308     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4309   }
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4314 {
4315   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4316   PetscBool      flg;
4317   PetscErrorCode ierr;
4318 
4319   PetscFunctionBegin;
4320   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4321   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4322   if (Ad)     *Ad     = a->A;
4323   if (Ao)     *Ao     = a->B;
4324   if (colmap) *colmap = a->garray;
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4329 {
4330   PetscErrorCode ierr;
4331   PetscInt       m,N,i,rstart,nnz,Ii;
4332   PetscInt       *indx;
4333   PetscScalar    *values;
4334 
4335   PetscFunctionBegin;
4336   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4337   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4338     PetscInt       *dnz,*onz,sum,bs,cbs;
4339 
4340     if (n == PETSC_DECIDE) {
4341       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4342     }
4343     /* Check sum(n) = N */
4344     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4345     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4346 
4347     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4348     rstart -= m;
4349 
4350     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4351     for (i=0; i<m; i++) {
4352       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4353       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4354       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4355     }
4356 
4357     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4358     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4359     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4360     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4361     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4362     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4363     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4364     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4365   }
4366 
4367   /* numeric phase */
4368   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4369   for (i=0; i<m; i++) {
4370     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4371     Ii   = i + rstart;
4372     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4373     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4374   }
4375   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4376   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4381 {
4382   PetscErrorCode    ierr;
4383   PetscMPIInt       rank;
4384   PetscInt          m,N,i,rstart,nnz;
4385   size_t            len;
4386   const PetscInt    *indx;
4387   PetscViewer       out;
4388   char              *name;
4389   Mat               B;
4390   const PetscScalar *values;
4391 
4392   PetscFunctionBegin;
4393   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4394   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4395   /* Should this be the type of the diagonal block of A? */
4396   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4397   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4398   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4399   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4400   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4401   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4402   for (i=0; i<m; i++) {
4403     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4404     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4405     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4406   }
4407   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4408   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4409 
4410   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4411   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4412   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4413   sprintf(name,"%s.%d",outfile,rank);
4414   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4415   ierr = PetscFree(name);CHKERRQ(ierr);
4416   ierr = MatView(B,out);CHKERRQ(ierr);
4417   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4418   ierr = MatDestroy(&B);CHKERRQ(ierr);
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4423 {
4424   PetscErrorCode      ierr;
4425   Mat_Merge_SeqsToMPI *merge;
4426   PetscContainer      container;
4427 
4428   PetscFunctionBegin;
4429   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4430   if (container) {
4431     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4434     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4435     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4444     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4445     ierr = PetscFree(merge);CHKERRQ(ierr);
4446     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4447   }
4448   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 #include <../src/mat/utils/freespace.h>
4453 #include <petscbt.h>
4454 
4455 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4456 {
4457   PetscErrorCode      ierr;
4458   MPI_Comm            comm;
4459   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4460   PetscMPIInt         size,rank,taga,*len_s;
4461   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4462   PetscInt            proc,m;
4463   PetscInt            **buf_ri,**buf_rj;
4464   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4465   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4466   MPI_Request         *s_waits,*r_waits;
4467   MPI_Status          *status;
4468   MatScalar           *aa=a->a;
4469   MatScalar           **abuf_r,*ba_i;
4470   Mat_Merge_SeqsToMPI *merge;
4471   PetscContainer      container;
4472 
4473   PetscFunctionBegin;
4474   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4475   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4476 
4477   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4478   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4479 
4480   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4481   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4482 
4483   bi     = merge->bi;
4484   bj     = merge->bj;
4485   buf_ri = merge->buf_ri;
4486   buf_rj = merge->buf_rj;
4487 
4488   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4489   owners = merge->rowmap->range;
4490   len_s  = merge->len_s;
4491 
4492   /* send and recv matrix values */
4493   /*-----------------------------*/
4494   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4495   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4496 
4497   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4498   for (proc=0,k=0; proc<size; proc++) {
4499     if (!len_s[proc]) continue;
4500     i    = owners[proc];
4501     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4502     k++;
4503   }
4504 
4505   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4506   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4507   ierr = PetscFree(status);CHKERRQ(ierr);
4508 
4509   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4510   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4511 
4512   /* insert mat values of mpimat */
4513   /*----------------------------*/
4514   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4515   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4516 
4517   for (k=0; k<merge->nrecv; k++) {
4518     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4519     nrows       = *(buf_ri_k[k]);
4520     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4521     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4522   }
4523 
4524   /* set values of ba */
4525   m = merge->rowmap->n;
4526   for (i=0; i<m; i++) {
4527     arow = owners[rank] + i;
4528     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4529     bnzi = bi[i+1] - bi[i];
4530     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4531 
4532     /* add local non-zero vals of this proc's seqmat into ba */
4533     anzi   = ai[arow+1] - ai[arow];
4534     aj     = a->j + ai[arow];
4535     aa     = a->a + ai[arow];
4536     nextaj = 0;
4537     for (j=0; nextaj<anzi; j++) {
4538       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4539         ba_i[j] += aa[nextaj++];
4540       }
4541     }
4542 
4543     /* add received vals into ba */
4544     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4545       /* i-th row */
4546       if (i == *nextrow[k]) {
4547         anzi   = *(nextai[k]+1) - *nextai[k];
4548         aj     = buf_rj[k] + *(nextai[k]);
4549         aa     = abuf_r[k] + *(nextai[k]);
4550         nextaj = 0;
4551         for (j=0; nextaj<anzi; j++) {
4552           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4553             ba_i[j] += aa[nextaj++];
4554           }
4555         }
4556         nextrow[k]++; nextai[k]++;
4557       }
4558     }
4559     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4560   }
4561   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4562   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4563 
4564   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4565   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4566   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4567   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4568   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4573 {
4574   PetscErrorCode      ierr;
4575   Mat                 B_mpi;
4576   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4577   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4578   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4579   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4580   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4581   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4582   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4583   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4584   MPI_Status          *status;
4585   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4586   PetscBT             lnkbt;
4587   Mat_Merge_SeqsToMPI *merge;
4588   PetscContainer      container;
4589 
4590   PetscFunctionBegin;
4591   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4592 
4593   /* make sure it is a PETSc comm */
4594   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4596   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4597 
4598   ierr = PetscNew(&merge);CHKERRQ(ierr);
4599   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4600 
4601   /* determine row ownership */
4602   /*---------------------------------------------------------*/
4603   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4604   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4605   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4606   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4607   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4608   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4609   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4610 
4611   m      = merge->rowmap->n;
4612   owners = merge->rowmap->range;
4613 
4614   /* determine the number of messages to send, their lengths */
4615   /*---------------------------------------------------------*/
4616   len_s = merge->len_s;
4617 
4618   len          = 0; /* length of buf_si[] */
4619   merge->nsend = 0;
4620   for (proc=0; proc<size; proc++) {
4621     len_si[proc] = 0;
4622     if (proc == rank) {
4623       len_s[proc] = 0;
4624     } else {
4625       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4626       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4627     }
4628     if (len_s[proc]) {
4629       merge->nsend++;
4630       nrows = 0;
4631       for (i=owners[proc]; i<owners[proc+1]; i++) {
4632         if (ai[i+1] > ai[i]) nrows++;
4633       }
4634       len_si[proc] = 2*(nrows+1);
4635       len         += len_si[proc];
4636     }
4637   }
4638 
4639   /* determine the number and length of messages to receive for ij-structure */
4640   /*-------------------------------------------------------------------------*/
4641   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4642   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4643 
4644   /* post the Irecv of j-structure */
4645   /*-------------------------------*/
4646   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4647   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4648 
4649   /* post the Isend of j-structure */
4650   /*--------------------------------*/
4651   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4652 
4653   for (proc=0, k=0; proc<size; proc++) {
4654     if (!len_s[proc]) continue;
4655     i    = owners[proc];
4656     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4657     k++;
4658   }
4659 
4660   /* receives and sends of j-structure are complete */
4661   /*------------------------------------------------*/
4662   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4663   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4664 
4665   /* send and recv i-structure */
4666   /*---------------------------*/
4667   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4668   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4669 
4670   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4671   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4672   for (proc=0,k=0; proc<size; proc++) {
4673     if (!len_s[proc]) continue;
4674     /* form outgoing message for i-structure:
4675          buf_si[0]:                 nrows to be sent
4676                [1:nrows]:           row index (global)
4677                [nrows+1:2*nrows+1]: i-structure index
4678     */
4679     /*-------------------------------------------*/
4680     nrows       = len_si[proc]/2 - 1;
4681     buf_si_i    = buf_si + nrows+1;
4682     buf_si[0]   = nrows;
4683     buf_si_i[0] = 0;
4684     nrows       = 0;
4685     for (i=owners[proc]; i<owners[proc+1]; i++) {
4686       anzi = ai[i+1] - ai[i];
4687       if (anzi) {
4688         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4689         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4690         nrows++;
4691       }
4692     }
4693     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4694     k++;
4695     buf_si += len_si[proc];
4696   }
4697 
4698   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4699   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4700 
4701   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4702   for (i=0; i<merge->nrecv; i++) {
4703     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4704   }
4705 
4706   ierr = PetscFree(len_si);CHKERRQ(ierr);
4707   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4708   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4709   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4710   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4711   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4712   ierr = PetscFree(status);CHKERRQ(ierr);
4713 
4714   /* compute a local seq matrix in each processor */
4715   /*----------------------------------------------*/
4716   /* allocate bi array and free space for accumulating nonzero column info */
4717   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4718   bi[0] = 0;
4719 
4720   /* create and initialize a linked list */
4721   nlnk = N+1;
4722   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4723 
4724   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4725   len  = ai[owners[rank+1]] - ai[owners[rank]];
4726   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4727 
4728   current_space = free_space;
4729 
4730   /* determine symbolic info for each local row */
4731   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4732 
4733   for (k=0; k<merge->nrecv; k++) {
4734     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4735     nrows       = *buf_ri_k[k];
4736     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4737     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4738   }
4739 
4740   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4741   len  = 0;
4742   for (i=0; i<m; i++) {
4743     bnzi = 0;
4744     /* add local non-zero cols of this proc's seqmat into lnk */
4745     arow  = owners[rank] + i;
4746     anzi  = ai[arow+1] - ai[arow];
4747     aj    = a->j + ai[arow];
4748     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4749     bnzi += nlnk;
4750     /* add received col data into lnk */
4751     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4752       if (i == *nextrow[k]) { /* i-th row */
4753         anzi  = *(nextai[k]+1) - *nextai[k];
4754         aj    = buf_rj[k] + *nextai[k];
4755         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4756         bnzi += nlnk;
4757         nextrow[k]++; nextai[k]++;
4758       }
4759     }
4760     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4761 
4762     /* if free space is not available, make more free space */
4763     if (current_space->local_remaining<bnzi) {
4764       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4765       nspacedouble++;
4766     }
4767     /* copy data into free space, then initialize lnk */
4768     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4769     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4770 
4771     current_space->array           += bnzi;
4772     current_space->local_used      += bnzi;
4773     current_space->local_remaining -= bnzi;
4774 
4775     bi[i+1] = bi[i] + bnzi;
4776   }
4777 
4778   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4779 
4780   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4781   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4782   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4783 
4784   /* create symbolic parallel matrix B_mpi */
4785   /*---------------------------------------*/
4786   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4787   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4788   if (n==PETSC_DECIDE) {
4789     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4790   } else {
4791     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4792   }
4793   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4794   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4795   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4796   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4797   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4798 
4799   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4800   B_mpi->assembled    = PETSC_FALSE;
4801   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4802   merge->bi           = bi;
4803   merge->bj           = bj;
4804   merge->buf_ri       = buf_ri;
4805   merge->buf_rj       = buf_rj;
4806   merge->coi          = NULL;
4807   merge->coj          = NULL;
4808   merge->owners_co    = NULL;
4809 
4810   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4811 
4812   /* attach the supporting struct to B_mpi for reuse */
4813   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4814   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4815   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4816   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4817   *mpimat = B_mpi;
4818 
4819   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4820   PetscFunctionReturn(0);
4821 }
4822 
4823 /*@C
4824       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4825                  matrices from each processor
4826 
4827     Collective on MPI_Comm
4828 
4829    Input Parameters:
4830 +    comm - the communicators the parallel matrix will live on
4831 .    seqmat - the input sequential matrices
4832 .    m - number of local rows (or PETSC_DECIDE)
4833 .    n - number of local columns (or PETSC_DECIDE)
4834 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4835 
4836    Output Parameter:
4837 .    mpimat - the parallel matrix generated
4838 
4839     Level: advanced
4840 
4841    Notes:
4842      The dimensions of the sequential matrix in each processor MUST be the same.
4843      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4844      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4845 @*/
4846 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4847 {
4848   PetscErrorCode ierr;
4849   PetscMPIInt    size;
4850 
4851   PetscFunctionBegin;
4852   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4853   if (size == 1) {
4854     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4855     if (scall == MAT_INITIAL_MATRIX) {
4856       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4857     } else {
4858       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4859     }
4860     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4861     PetscFunctionReturn(0);
4862   }
4863   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4864   if (scall == MAT_INITIAL_MATRIX) {
4865     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4866   }
4867   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4868   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4869   PetscFunctionReturn(0);
4870 }
4871 
4872 /*@
4873      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4874           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4875           with MatGetSize()
4876 
4877     Not Collective
4878 
4879    Input Parameters:
4880 +    A - the matrix
4881 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4882 
4883    Output Parameter:
4884 .    A_loc - the local sequential matrix generated
4885 
4886     Level: developer
4887 
4888 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4889 
4890 @*/
4891 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4892 {
4893   PetscErrorCode ierr;
4894   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4895   Mat_SeqAIJ     *mat,*a,*b;
4896   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4897   MatScalar      *aa,*ba,*cam;
4898   PetscScalar    *ca;
4899   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4900   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4901   PetscBool      match;
4902   MPI_Comm       comm;
4903   PetscMPIInt    size;
4904 
4905   PetscFunctionBegin;
4906   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4907   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4908   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4910   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4911 
4912   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4913   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4914   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4915   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4916   aa = a->a; ba = b->a;
4917   if (scall == MAT_INITIAL_MATRIX) {
4918     if (size == 1) {
4919       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4920       PetscFunctionReturn(0);
4921     }
4922 
4923     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4924     ci[0] = 0;
4925     for (i=0; i<am; i++) {
4926       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4927     }
4928     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4929     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4930     k    = 0;
4931     for (i=0; i<am; i++) {
4932       ncols_o = bi[i+1] - bi[i];
4933       ncols_d = ai[i+1] - ai[i];
4934       /* off-diagonal portion of A */
4935       for (jo=0; jo<ncols_o; jo++) {
4936         col = cmap[*bj];
4937         if (col >= cstart) break;
4938         cj[k]   = col; bj++;
4939         ca[k++] = *ba++;
4940       }
4941       /* diagonal portion of A */
4942       for (j=0; j<ncols_d; j++) {
4943         cj[k]   = cstart + *aj++;
4944         ca[k++] = *aa++;
4945       }
4946       /* off-diagonal portion of A */
4947       for (j=jo; j<ncols_o; j++) {
4948         cj[k]   = cmap[*bj++];
4949         ca[k++] = *ba++;
4950       }
4951     }
4952     /* put together the new matrix */
4953     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4954     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4955     /* Since these are PETSc arrays, change flags to free them as necessary. */
4956     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4957     mat->free_a  = PETSC_TRUE;
4958     mat->free_ij = PETSC_TRUE;
4959     mat->nonew   = 0;
4960   } else if (scall == MAT_REUSE_MATRIX) {
4961     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4962     ci = mat->i; cj = mat->j; cam = mat->a;
4963     for (i=0; i<am; i++) {
4964       /* off-diagonal portion of A */
4965       ncols_o = bi[i+1] - bi[i];
4966       for (jo=0; jo<ncols_o; jo++) {
4967         col = cmap[*bj];
4968         if (col >= cstart) break;
4969         *cam++ = *ba++; bj++;
4970       }
4971       /* diagonal portion of A */
4972       ncols_d = ai[i+1] - ai[i];
4973       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4974       /* off-diagonal portion of A */
4975       for (j=jo; j<ncols_o; j++) {
4976         *cam++ = *ba++; bj++;
4977       }
4978     }
4979   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4980   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 /*@C
4985      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4986 
4987     Not Collective
4988 
4989    Input Parameters:
4990 +    A - the matrix
4991 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4992 -    row, col - index sets of rows and columns to extract (or NULL)
4993 
4994    Output Parameter:
4995 .    A_loc - the local sequential matrix generated
4996 
4997     Level: developer
4998 
4999 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5000 
5001 @*/
5002 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5003 {
5004   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5005   PetscErrorCode ierr;
5006   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5007   IS             isrowa,iscola;
5008   Mat            *aloc;
5009   PetscBool      match;
5010 
5011   PetscFunctionBegin;
5012   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5013   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5014   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5015   if (!row) {
5016     start = A->rmap->rstart; end = A->rmap->rend;
5017     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5018   } else {
5019     isrowa = *row;
5020   }
5021   if (!col) {
5022     start = A->cmap->rstart;
5023     cmap  = a->garray;
5024     nzA   = a->A->cmap->n;
5025     nzB   = a->B->cmap->n;
5026     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5027     ncols = 0;
5028     for (i=0; i<nzB; i++) {
5029       if (cmap[i] < start) idx[ncols++] = cmap[i];
5030       else break;
5031     }
5032     imark = i;
5033     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5034     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5035     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5036   } else {
5037     iscola = *col;
5038   }
5039   if (scall != MAT_INITIAL_MATRIX) {
5040     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5041     aloc[0] = *A_loc;
5042   }
5043   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5044   *A_loc = aloc[0];
5045   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5046   if (!row) {
5047     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5048   }
5049   if (!col) {
5050     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5051   }
5052   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5053   PetscFunctionReturn(0);
5054 }
5055 
5056 /*@C
5057     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5058 
5059     Collective on Mat
5060 
5061    Input Parameters:
5062 +    A,B - the matrices in mpiaij format
5063 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5064 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5065 
5066    Output Parameter:
5067 +    rowb, colb - index sets of rows and columns of B to extract
5068 -    B_seq - the sequential matrix generated
5069 
5070     Level: developer
5071 
5072 @*/
5073 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5074 {
5075   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5076   PetscErrorCode ierr;
5077   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5078   IS             isrowb,iscolb;
5079   Mat            *bseq=NULL;
5080 
5081   PetscFunctionBegin;
5082   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5083     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5084   }
5085   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5086 
5087   if (scall == MAT_INITIAL_MATRIX) {
5088     start = A->cmap->rstart;
5089     cmap  = a->garray;
5090     nzA   = a->A->cmap->n;
5091     nzB   = a->B->cmap->n;
5092     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5093     ncols = 0;
5094     for (i=0; i<nzB; i++) {  /* row < local row index */
5095       if (cmap[i] < start) idx[ncols++] = cmap[i];
5096       else break;
5097     }
5098     imark = i;
5099     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5100     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5101     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5102     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5103   } else {
5104     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5105     isrowb  = *rowb; iscolb = *colb;
5106     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5107     bseq[0] = *B_seq;
5108   }
5109   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5110   *B_seq = bseq[0];
5111   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5112   if (!rowb) {
5113     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5114   } else {
5115     *rowb = isrowb;
5116   }
5117   if (!colb) {
5118     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5119   } else {
5120     *colb = iscolb;
5121   }
5122   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5123   PetscFunctionReturn(0);
5124 }
5125 
5126 /*
5127     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5128     of the OFF-DIAGONAL portion of local A
5129 
5130     Collective on Mat
5131 
5132    Input Parameters:
5133 +    A,B - the matrices in mpiaij format
5134 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5135 
5136    Output Parameter:
5137 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5138 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5139 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5140 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5141 
5142     Level: developer
5143 
5144 */
5145 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5146 {
5147   VecScatter_MPI_General *gen_to,*gen_from;
5148   PetscErrorCode         ierr;
5149   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5150   Mat_SeqAIJ             *b_oth;
5151   VecScatter             ctx;
5152   MPI_Comm               comm;
5153   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5154   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5155   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5156   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5157   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5158   MPI_Request            *rwaits = NULL,*swaits = NULL;
5159   MPI_Status             *sstatus,rstatus;
5160   PetscMPIInt            jj,size;
5161   VecScatterType         type;
5162   PetscBool              mpi1;
5163 
5164   PetscFunctionBegin;
5165   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5167 
5168   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5169     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5170   }
5171   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5172   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5173 
5174   if (size == 1) {
5175     startsj_s = NULL;
5176     bufa_ptr  = NULL;
5177     *B_oth    = NULL;
5178     PetscFunctionReturn(0);
5179   }
5180 
5181   ctx = a->Mvctx;
5182   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5183   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5184   if (!mpi1) {
5185     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5186      thus create a->Mvctx_mpi1 */
5187     if (!a->Mvctx_mpi1) {
5188       a->Mvctx_mpi1_flg = PETSC_TRUE;
5189       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5190     }
5191     ctx = a->Mvctx_mpi1;
5192   }
5193   tag = ((PetscObject)ctx)->tag;
5194 
5195   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5196   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5197   nrecvs   = gen_from->n;
5198   nsends   = gen_to->n;
5199 
5200   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5201   srow    = gen_to->indices;    /* local row index to be sent */
5202   sstarts = gen_to->starts;
5203   sprocs  = gen_to->procs;
5204   sstatus = gen_to->sstatus;
5205   sbs     = gen_to->bs;
5206   rstarts = gen_from->starts;
5207   rprocs  = gen_from->procs;
5208   rbs     = gen_from->bs;
5209 
5210   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5211   if (scall == MAT_INITIAL_MATRIX) {
5212     /* i-array */
5213     /*---------*/
5214     /*  post receives */
5215     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5216     for (i=0; i<nrecvs; i++) {
5217       rowlen = rvalues + rstarts[i]*rbs;
5218       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5219       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5220     }
5221 
5222     /* pack the outgoing message */
5223     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5224 
5225     sstartsj[0] = 0;
5226     rstartsj[0] = 0;
5227     len         = 0; /* total length of j or a array to be sent */
5228     k           = 0;
5229     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5230     for (i=0; i<nsends; i++) {
5231       rowlen = svalues + sstarts[i]*sbs;
5232       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5233       for (j=0; j<nrows; j++) {
5234         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5235         for (l=0; l<sbs; l++) {
5236           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5237 
5238           rowlen[j*sbs+l] = ncols;
5239 
5240           len += ncols;
5241           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5242         }
5243         k++;
5244       }
5245       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5246 
5247       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5248     }
5249     /* recvs and sends of i-array are completed */
5250     i = nrecvs;
5251     while (i--) {
5252       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5253     }
5254     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5255     ierr = PetscFree(svalues);CHKERRQ(ierr);
5256 
5257     /* allocate buffers for sending j and a arrays */
5258     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5259     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5260 
5261     /* create i-array of B_oth */
5262     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5263 
5264     b_othi[0] = 0;
5265     len       = 0; /* total length of j or a array to be received */
5266     k         = 0;
5267     for (i=0; i<nrecvs; i++) {
5268       rowlen = rvalues + rstarts[i]*rbs;
5269       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5270       for (j=0; j<nrows; j++) {
5271         b_othi[k+1] = b_othi[k] + rowlen[j];
5272         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5273         k++;
5274       }
5275       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5276     }
5277     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5278 
5279     /* allocate space for j and a arrrays of B_oth */
5280     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5281     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5282 
5283     /* j-array */
5284     /*---------*/
5285     /*  post receives of j-array */
5286     for (i=0; i<nrecvs; i++) {
5287       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5288       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5289     }
5290 
5291     /* pack the outgoing message j-array */
5292     k = 0;
5293     for (i=0; i<nsends; i++) {
5294       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5295       bufJ  = bufj+sstartsj[i];
5296       for (j=0; j<nrows; j++) {
5297         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5298         for (ll=0; ll<sbs; ll++) {
5299           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5300           for (l=0; l<ncols; l++) {
5301             *bufJ++ = cols[l];
5302           }
5303           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5304         }
5305       }
5306       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5307     }
5308 
5309     /* recvs and sends of j-array are completed */
5310     i = nrecvs;
5311     while (i--) {
5312       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5313     }
5314     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5315   } else if (scall == MAT_REUSE_MATRIX) {
5316     sstartsj = *startsj_s;
5317     rstartsj = *startsj_r;
5318     bufa     = *bufa_ptr;
5319     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5320     b_otha   = b_oth->a;
5321   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5322 
5323   /* a-array */
5324   /*---------*/
5325   /*  post receives of a-array */
5326   for (i=0; i<nrecvs; i++) {
5327     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5328     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5329   }
5330 
5331   /* pack the outgoing message a-array */
5332   k = 0;
5333   for (i=0; i<nsends; i++) {
5334     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5335     bufA  = bufa+sstartsj[i];
5336     for (j=0; j<nrows; j++) {
5337       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5338       for (ll=0; ll<sbs; ll++) {
5339         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5340         for (l=0; l<ncols; l++) {
5341           *bufA++ = vals[l];
5342         }
5343         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5344       }
5345     }
5346     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5347   }
5348   /* recvs and sends of a-array are completed */
5349   i = nrecvs;
5350   while (i--) {
5351     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5352   }
5353   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5354   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5355 
5356   if (scall == MAT_INITIAL_MATRIX) {
5357     /* put together the new matrix */
5358     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5359 
5360     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5361     /* Since these are PETSc arrays, change flags to free them as necessary. */
5362     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5363     b_oth->free_a  = PETSC_TRUE;
5364     b_oth->free_ij = PETSC_TRUE;
5365     b_oth->nonew   = 0;
5366 
5367     ierr = PetscFree(bufj);CHKERRQ(ierr);
5368     if (!startsj_s || !bufa_ptr) {
5369       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5370       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5371     } else {
5372       *startsj_s = sstartsj;
5373       *startsj_r = rstartsj;
5374       *bufa_ptr  = bufa;
5375     }
5376   }
5377   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5378   PetscFunctionReturn(0);
5379 }
5380 
5381 /*@C
5382   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5383 
5384   Not Collective
5385 
5386   Input Parameters:
5387 . A - The matrix in mpiaij format
5388 
5389   Output Parameter:
5390 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5391 . colmap - A map from global column index to local index into lvec
5392 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5393 
5394   Level: developer
5395 
5396 @*/
5397 #if defined(PETSC_USE_CTABLE)
5398 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5399 #else
5400 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5401 #endif
5402 {
5403   Mat_MPIAIJ *a;
5404 
5405   PetscFunctionBegin;
5406   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5407   PetscValidPointer(lvec, 2);
5408   PetscValidPointer(colmap, 3);
5409   PetscValidPointer(multScatter, 4);
5410   a = (Mat_MPIAIJ*) A->data;
5411   if (lvec) *lvec = a->lvec;
5412   if (colmap) *colmap = a->colmap;
5413   if (multScatter) *multScatter = a->Mvctx;
5414   PetscFunctionReturn(0);
5415 }
5416 
5417 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5418 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5419 #if defined(PETSC_HAVE_MKL_SPARSE)
5420 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5421 #endif
5422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5423 #if defined(PETSC_HAVE_ELEMENTAL)
5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5425 #endif
5426 #if defined(PETSC_HAVE_HYPRE)
5427 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5428 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5429 #endif
5430 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5431 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5432 
5433 /*
5434     Computes (B'*A')' since computing B*A directly is untenable
5435 
5436                n                       p                          p
5437         (              )       (              )         (                  )
5438       m (      A       )  *  n (       B      )   =   m (         C        )
5439         (              )       (              )         (                  )
5440 
5441 */
5442 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5443 {
5444   PetscErrorCode ierr;
5445   Mat            At,Bt,Ct;
5446 
5447   PetscFunctionBegin;
5448   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5449   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5450   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5451   ierr = MatDestroy(&At);CHKERRQ(ierr);
5452   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5453   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5454   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5455   PetscFunctionReturn(0);
5456 }
5457 
5458 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5459 {
5460   PetscErrorCode ierr;
5461   PetscInt       m=A->rmap->n,n=B->cmap->n;
5462   Mat            Cmat;
5463 
5464   PetscFunctionBegin;
5465   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5466   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5467   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5468   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5469   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5470   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5471   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5472   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5473 
5474   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5475 
5476   *C = Cmat;
5477   PetscFunctionReturn(0);
5478 }
5479 
5480 /* ----------------------------------------------------------------*/
5481 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5482 {
5483   PetscErrorCode ierr;
5484 
5485   PetscFunctionBegin;
5486   if (scall == MAT_INITIAL_MATRIX) {
5487     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5488     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5489     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5490   }
5491   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5492   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5493   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5494   PetscFunctionReturn(0);
5495 }
5496 
5497 /*MC
5498    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5499 
5500    Options Database Keys:
5501 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5502 
5503   Level: beginner
5504 
5505 .seealso: MatCreateAIJ()
5506 M*/
5507 
5508 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5509 {
5510   Mat_MPIAIJ     *b;
5511   PetscErrorCode ierr;
5512   PetscMPIInt    size;
5513 
5514   PetscFunctionBegin;
5515   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5516 
5517   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5518   B->data       = (void*)b;
5519   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5520   B->assembled  = PETSC_FALSE;
5521   B->insertmode = NOT_SET_VALUES;
5522   b->size       = size;
5523 
5524   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5525 
5526   /* build cache for off array entries formed */
5527   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5528 
5529   b->donotstash  = PETSC_FALSE;
5530   b->colmap      = 0;
5531   b->garray      = 0;
5532   b->roworiented = PETSC_TRUE;
5533 
5534   /* stuff used for matrix vector multiply */
5535   b->lvec  = NULL;
5536   b->Mvctx = NULL;
5537 
5538   /* stuff for MatGetRow() */
5539   b->rowindices   = 0;
5540   b->rowvalues    = 0;
5541   b->getrowactive = PETSC_FALSE;
5542 
5543   /* flexible pointer used in CUSP/CUSPARSE classes */
5544   b->spptr = NULL;
5545 
5546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5549   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5555 #if defined(PETSC_HAVE_MKL_SPARSE)
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5557 #endif
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5560 #if defined(PETSC_HAVE_ELEMENTAL)
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5562 #endif
5563 #if defined(PETSC_HAVE_HYPRE)
5564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5565 #endif
5566   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5569   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5571 #if defined(PETSC_HAVE_HYPRE)
5572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5573 #endif
5574   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5575   PetscFunctionReturn(0);
5576 }
5577 
5578 /*@C
5579      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5580          and "off-diagonal" part of the matrix in CSR format.
5581 
5582    Collective on MPI_Comm
5583 
5584    Input Parameters:
5585 +  comm - MPI communicator
5586 .  m - number of local rows (Cannot be PETSC_DECIDE)
5587 .  n - This value should be the same as the local size used in creating the
5588        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5589        calculated if N is given) For square matrices n is almost always m.
5590 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5591 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5592 .   i - row indices for "diagonal" portion of matrix
5593 .   j - column indices
5594 .   a - matrix values
5595 .   oi - row indices for "off-diagonal" portion of matrix
5596 .   oj - column indices
5597 -   oa - matrix values
5598 
5599    Output Parameter:
5600 .   mat - the matrix
5601 
5602    Level: advanced
5603 
5604    Notes:
5605        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5606        must free the arrays once the matrix has been destroyed and not before.
5607 
5608        The i and j indices are 0 based
5609 
5610        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5611 
5612        This sets local rows and cannot be used to set off-processor values.
5613 
5614        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5615        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5616        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5617        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5618        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5619        communication if it is known that only local entries will be set.
5620 
5621 .keywords: matrix, aij, compressed row, sparse, parallel
5622 
5623 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5624           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5625 @*/
5626 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5627 {
5628   PetscErrorCode ierr;
5629   Mat_MPIAIJ     *maij;
5630 
5631   PetscFunctionBegin;
5632   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5633   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5634   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5635   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5636   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5637   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5638   maij = (Mat_MPIAIJ*) (*mat)->data;
5639 
5640   (*mat)->preallocated = PETSC_TRUE;
5641 
5642   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5643   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5644 
5645   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5646   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5647 
5648   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5649   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5650   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5651   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5652 
5653   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5654   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5655   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5656   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5657   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5658   PetscFunctionReturn(0);
5659 }
5660 
5661 /*
5662     Special version for direct calls from Fortran
5663 */
5664 #include <petsc/private/fortranimpl.h>
5665 
5666 /* Change these macros so can be used in void function */
5667 #undef CHKERRQ
5668 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5669 #undef SETERRQ2
5670 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5671 #undef SETERRQ3
5672 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5673 #undef SETERRQ
5674 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5675 
5676 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5677 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5678 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5679 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5680 #else
5681 #endif
5682 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5683 {
5684   Mat            mat  = *mmat;
5685   PetscInt       m    = *mm, n = *mn;
5686   InsertMode     addv = *maddv;
5687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5688   PetscScalar    value;
5689   PetscErrorCode ierr;
5690 
5691   MatCheckPreallocated(mat,1);
5692   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5693 
5694 #if defined(PETSC_USE_DEBUG)
5695   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5696 #endif
5697   {
5698     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5699     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5700     PetscBool roworiented = aij->roworiented;
5701 
5702     /* Some Variables required in the macro */
5703     Mat        A                 = aij->A;
5704     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5705     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5706     MatScalar  *aa               = a->a;
5707     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5708     Mat        B                 = aij->B;
5709     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5710     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5711     MatScalar  *ba               = b->a;
5712 
5713     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5714     PetscInt  nonew = a->nonew;
5715     MatScalar *ap1,*ap2;
5716 
5717     PetscFunctionBegin;
5718     for (i=0; i<m; i++) {
5719       if (im[i] < 0) continue;
5720 #if defined(PETSC_USE_DEBUG)
5721       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5722 #endif
5723       if (im[i] >= rstart && im[i] < rend) {
5724         row      = im[i] - rstart;
5725         lastcol1 = -1;
5726         rp1      = aj + ai[row];
5727         ap1      = aa + ai[row];
5728         rmax1    = aimax[row];
5729         nrow1    = ailen[row];
5730         low1     = 0;
5731         high1    = nrow1;
5732         lastcol2 = -1;
5733         rp2      = bj + bi[row];
5734         ap2      = ba + bi[row];
5735         rmax2    = bimax[row];
5736         nrow2    = bilen[row];
5737         low2     = 0;
5738         high2    = nrow2;
5739 
5740         for (j=0; j<n; j++) {
5741           if (roworiented) value = v[i*n+j];
5742           else value = v[i+j*m];
5743           if (in[j] >= cstart && in[j] < cend) {
5744             col = in[j] - cstart;
5745             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5746             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5747           } else if (in[j] < 0) continue;
5748 #if defined(PETSC_USE_DEBUG)
5749           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5750           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5751 #endif
5752           else {
5753             if (mat->was_assembled) {
5754               if (!aij->colmap) {
5755                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5756               }
5757 #if defined(PETSC_USE_CTABLE)
5758               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5759               col--;
5760 #else
5761               col = aij->colmap[in[j]] - 1;
5762 #endif
5763               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5764               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5765                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5766                 col  =  in[j];
5767                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5768                 B     = aij->B;
5769                 b     = (Mat_SeqAIJ*)B->data;
5770                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5771                 rp2   = bj + bi[row];
5772                 ap2   = ba + bi[row];
5773                 rmax2 = bimax[row];
5774                 nrow2 = bilen[row];
5775                 low2  = 0;
5776                 high2 = nrow2;
5777                 bm    = aij->B->rmap->n;
5778                 ba    = b->a;
5779               }
5780             } else col = in[j];
5781             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5782           }
5783         }
5784       } else if (!aij->donotstash) {
5785         if (roworiented) {
5786           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5787         } else {
5788           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5789         }
5790       }
5791     }
5792   }
5793   PetscFunctionReturnVoid();
5794 }
5795 
5796