xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 580bdb303e1ee3b1222b2042810b4c26340259c6)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
218   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) { \
444             ap1[_i] += value;   \
445             /* Not sure LogFlops will slow dow the code or not */ \
446             (void)PetscLogFlops(1.0);   \
447            } \
448           else                    ap1[_i] = value; \
449           goto a_noinsert; \
450         } \
451       }  \
452       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
453       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
454       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
455       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
456       N = nrow1++ - 1; a->nz++; high1++; \
457       /* shift up all the later entries in this row */ \
458       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
459       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
460       rp1[_i] = col;  \
461       ap1[_i] = value;  \
462       A->nonzerostate++;\
463       a_noinsert: ; \
464       ailen[row] = nrow1; \
465 }
466 
467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
468   { \
469     if (col <= lastcol2) low2 = 0;                        \
470     else high2 = nrow2;                                   \
471     lastcol2 = col;                                       \
472     while (high2-low2 > 5) {                              \
473       t = (low2+high2)/2;                                 \
474       if (rp2[t] > col) high2 = t;                        \
475       else             low2  = t;                         \
476     }                                                     \
477     for (_i=low2; _i<high2; _i++) {                       \
478       if (rp2[_i] > col) break;                           \
479       if (rp2[_i] == col) {                               \
480         if (addv == ADD_VALUES) {                         \
481           ap2[_i] += value;                               \
482           (void)PetscLogFlops(1.0);                       \
483         }                                                 \
484         else                    ap2[_i] = value;          \
485         goto b_noinsert;                                  \
486       }                                                   \
487     }                                                     \
488     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
489     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
490     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
491     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
492     N = nrow2++ - 1; b->nz++; high2++;                    \
493     /* shift up all the later entries in this row */      \
494     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
495     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
496     rp2[_i] = col;                                        \
497     ap2[_i] = value;                                      \
498     B->nonzerostate++;                                    \
499     b_noinsert: ;                                         \
500     bilen[row] = nrow2;                                   \
501   }
502 
503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
504 {
505   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
506   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
507   PetscErrorCode ierr;
508   PetscInt       l,*garray = mat->garray,diag;
509 
510   PetscFunctionBegin;
511   /* code only works for square matrices A */
512 
513   /* find size of row to the left of the diagonal part */
514   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
515   row  = row - diag;
516   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
517     if (garray[b->j[b->i[row]+l]] > diag) break;
518   }
519   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
520 
521   /* diagonal part */
522   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
523 
524   /* right of diagonal part */
525   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
530 {
531   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
532   PetscScalar    value;
533   PetscErrorCode ierr;
534   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
535   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
536   PetscBool      roworiented = aij->roworiented;
537 
538   /* Some Variables required in the macro */
539   Mat        A                 = aij->A;
540   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
541   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
542   MatScalar  *aa               = a->a;
543   PetscBool  ignorezeroentries = a->ignorezeroentries;
544   Mat        B                 = aij->B;
545   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
546   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
547   MatScalar  *ba               = b->a;
548 
549   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
550   PetscInt  nonew;
551   MatScalar *ap1,*ap2;
552 
553   PetscFunctionBegin;
554   for (i=0; i<m; i++) {
555     if (im[i] < 0) continue;
556 #if defined(PETSC_USE_DEBUG)
557     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
558 #endif
559     if (im[i] >= rstart && im[i] < rend) {
560       row      = im[i] - rstart;
561       lastcol1 = -1;
562       rp1      = aj + ai[row];
563       ap1      = aa + ai[row];
564       rmax1    = aimax[row];
565       nrow1    = ailen[row];
566       low1     = 0;
567       high1    = nrow1;
568       lastcol2 = -1;
569       rp2      = bj + bi[row];
570       ap2      = ba + bi[row];
571       rmax2    = bimax[row];
572       nrow2    = bilen[row];
573       low2     = 0;
574       high2    = nrow2;
575 
576       for (j=0; j<n; j++) {
577         if (roworiented) value = v[i*n+j];
578         else             value = v[i+j*m];
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
583           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
584         } else if (in[j] < 0) continue;
585 #if defined(PETSC_USE_DEBUG)
586         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
587 #endif
588         else {
589           if (mat->was_assembled) {
590             if (!aij->colmap) {
591               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
592             }
593 #if defined(PETSC_USE_CTABLE)
594             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
595             col--;
596 #else
597             col = aij->colmap[in[j]] - 1;
598 #endif
599             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
600               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
601               col  =  in[j];
602               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
603               B     = aij->B;
604               b     = (Mat_SeqAIJ*)B->data;
605               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
606               rp2   = bj + bi[row];
607               ap2   = ba + bi[row];
608               rmax2 = bimax[row];
609               nrow2 = bilen[row];
610               low2  = 0;
611               high2 = nrow2;
612               bm    = aij->B->rmap->n;
613               ba    = b->a;
614             } else if (col < 0) {
615               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
616                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
617               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
618             }
619           } else col = in[j];
620           nonew = b->nonew;
621           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
622         }
623       }
624     } else {
625       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
626       if (!aij->donotstash) {
627         mat->assembled = PETSC_FALSE;
628         if (roworiented) {
629           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
630         } else {
631           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
632         }
633       }
634     }
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 /*
640     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
641     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
642     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
643 */
644 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
645 {
646   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
647   Mat            A           = aij->A; /* diagonal part of the matrix */
648   Mat            B           = aij->B; /* offdiagonal part of the matrix */
649   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
650   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
651   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
652   PetscInt       *ailen      = a->ilen,*aj = a->j;
653   PetscInt       *bilen      = b->ilen,*bj = b->j;
654   PetscInt       am          = aij->A->rmap->n,j;
655   PetscInt       diag_so_far = 0,dnz;
656   PetscInt       offd_so_far = 0,onz;
657 
658   PetscFunctionBegin;
659   /* Iterate over all rows of the matrix */
660   for (j=0; j<am; j++) {
661     dnz = onz = 0;
662     /*  Iterate over all non-zero columns of the current row */
663     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
664       /* If column is in the diagonal */
665       if (mat_j[col] >= cstart && mat_j[col] < cend) {
666         aj[diag_so_far++] = mat_j[col] - cstart;
667         dnz++;
668       } else { /* off-diagonal entries */
669         bj[offd_so_far++] = mat_j[col];
670         onz++;
671       }
672     }
673     ailen[j] = dnz;
674     bilen[j] = onz;
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 /*
680     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
681     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
682     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
683     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
684     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
685 */
686 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
687 {
688   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
689   Mat            A      = aij->A; /* diagonal part of the matrix */
690   Mat            B      = aij->B; /* offdiagonal part of the matrix */
691   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
692   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
693   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
694   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
695   PetscInt       *ailen = a->ilen,*aj = a->j;
696   PetscInt       *bilen = b->ilen,*bj = b->j;
697   PetscInt       am     = aij->A->rmap->n,j;
698   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
699   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
700   PetscScalar    *aa = a->a,*ba = b->a;
701 
702   PetscFunctionBegin;
703   /* Iterate over all rows of the matrix */
704   for (j=0; j<am; j++) {
705     dnz_row = onz_row = 0;
706     rowstart_offd = full_offd_i[j];
707     rowstart_diag = full_diag_i[j];
708     /*  Iterate over all non-zero columns of the current row */
709     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
710       /* If column is in the diagonal */
711       if (mat_j[col] >= cstart && mat_j[col] < cend) {
712         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
713         aa[rowstart_diag+dnz_row] = mat_a[col];
714         dnz_row++;
715       } else { /* off-diagonal entries */
716         bj[rowstart_offd+onz_row] = mat_j[col];
717         ba[rowstart_offd+onz_row] = mat_a[col];
718         onz_row++;
719       }
720     }
721     ailen[j] = dnz_row;
722     bilen[j] = onz_row;
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
728 {
729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
730   PetscErrorCode ierr;
731   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
732   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
733 
734   PetscFunctionBegin;
735   for (i=0; i<m; i++) {
736     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
737     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
738     if (idxm[i] >= rstart && idxm[i] < rend) {
739       row = idxm[i] - rstart;
740       for (j=0; j<n; j++) {
741         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
742         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
743         if (idxn[j] >= cstart && idxn[j] < cend) {
744           col  = idxn[j] - cstart;
745           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
746         } else {
747           if (!aij->colmap) {
748             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
749           }
750 #if defined(PETSC_USE_CTABLE)
751           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
752           col--;
753 #else
754           col = aij->colmap[idxn[j]] - 1;
755 #endif
756           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
757           else {
758             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
759           }
760         }
761       }
762     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
763   }
764   PetscFunctionReturn(0);
765 }
766 
767 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
768 
769 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
770 {
771   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
772   PetscErrorCode ierr;
773   PetscInt       nstash,reallocs;
774 
775   PetscFunctionBegin;
776   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
777 
778   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
779   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
780   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
785 {
786   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
787   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
788   PetscErrorCode ierr;
789   PetscMPIInt    n;
790   PetscInt       i,j,rstart,ncols,flg;
791   PetscInt       *row,*col;
792   PetscBool      other_disassembled;
793   PetscScalar    *val;
794 
795   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
796 
797   PetscFunctionBegin;
798   if (!aij->donotstash && !mat->nooffprocentries) {
799     while (1) {
800       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
801       if (!flg) break;
802 
803       for (i=0; i<n; ) {
804         /* Now identify the consecutive vals belonging to the same row */
805         for (j=i,rstart=row[j]; j<n; j++) {
806           if (row[j] != rstart) break;
807         }
808         if (j < n) ncols = j-i;
809         else       ncols = n-i;
810         /* Now assemble all these values with a single function call */
811         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
812 
813         i = j;
814       }
815     }
816     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
817   }
818   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
819   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
820 
821   /* determine if any processor has disassembled, if so we must
822      also disassemble ourselfs, in order that we may reassemble. */
823   /*
824      if nonzero structure of submatrix B cannot change then we know that
825      no processor disassembled thus we can skip this stuff
826   */
827   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
828     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
829     if (mat->was_assembled && !other_disassembled) {
830       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
831     }
832   }
833   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
834     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
835   }
836   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
839 
840   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
841 
842   aij->rowvalues = 0;
843 
844   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
845   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
846 
847   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
848   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
849     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
850     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
856 {
857   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
858   PetscErrorCode ierr;
859 
860   PetscFunctionBegin;
861   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
862   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
867 {
868   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
869   PetscObjectState sA, sB;
870   PetscInt        *lrows;
871   PetscInt         r, len;
872   PetscBool        cong, lch, gch;
873   PetscErrorCode   ierr;
874 
875   PetscFunctionBegin;
876   /* get locally owned rows */
877   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
878   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
879   /* fix right hand side if needed */
880   if (x && b) {
881     const PetscScalar *xx;
882     PetscScalar       *bb;
883 
884     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
885     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
886     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
887     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
888     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
889     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
890   }
891 
892   sA = mat->A->nonzerostate;
893   sB = mat->B->nonzerostate;
894 
895   if (diag != 0.0 && cong) {
896     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
897     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
898   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
899     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
900     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
901     PetscInt   nnwA, nnwB;
902     PetscBool  nnzA, nnzB;
903 
904     nnwA = aijA->nonew;
905     nnwB = aijB->nonew;
906     nnzA = aijA->keepnonzeropattern;
907     nnzB = aijB->keepnonzeropattern;
908     if (!nnzA) {
909       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
910       aijA->nonew = 0;
911     }
912     if (!nnzB) {
913       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
914       aijB->nonew = 0;
915     }
916     /* Must zero here before the next loop */
917     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
918     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
919     for (r = 0; r < len; ++r) {
920       const PetscInt row = lrows[r] + A->rmap->rstart;
921       if (row >= A->cmap->N) continue;
922       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
923     }
924     aijA->nonew = nnwA;
925     aijB->nonew = nnwB;
926   } else {
927     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
928     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
929   }
930   ierr = PetscFree(lrows);CHKERRQ(ierr);
931   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
933 
934   /* reduce nonzerostate */
935   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
936   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
937   if (gch) A->nonzerostate++;
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
942 {
943   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
944   PetscErrorCode    ierr;
945   PetscMPIInt       n = A->rmap->n;
946   PetscInt          i,j,r,m,p = 0,len = 0;
947   PetscInt          *lrows,*owners = A->rmap->range;
948   PetscSFNode       *rrows;
949   PetscSF           sf;
950   const PetscScalar *xx;
951   PetscScalar       *bb,*mask;
952   Vec               xmask,lmask;
953   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
954   const PetscInt    *aj, *ii,*ridx;
955   PetscScalar       *aa;
956 
957   PetscFunctionBegin;
958   /* Create SF where leaves are input rows and roots are owned rows */
959   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
960   for (r = 0; r < n; ++r) lrows[r] = -1;
961   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
962   for (r = 0; r < N; ++r) {
963     const PetscInt idx   = rows[r];
964     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
965     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
966       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
967     }
968     rrows[r].rank  = p;
969     rrows[r].index = rows[r] - owners[p];
970   }
971   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
972   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
973   /* Collect flags for rows to be zeroed */
974   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
975   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
976   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
977   /* Compress and put in row numbers */
978   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
979   /* zero diagonal part of matrix */
980   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
981   /* handle off diagonal part of matrix */
982   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
983   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
984   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
985   for (i=0; i<len; i++) bb[lrows[i]] = 1;
986   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
987   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
990   if (x && b) { /* this code is buggy when the row and column layout don't match */
991     PetscBool cong;
992 
993     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
994     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
995     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
998     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
999   }
1000   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1001   /* remove zeroed rows of off diagonal matrix */
1002   ii = aij->i;
1003   for (i=0; i<len; i++) {
1004     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1005   }
1006   /* loop over all elements of off process part of matrix zeroing removed columns*/
1007   if (aij->compressedrow.use) {
1008     m    = aij->compressedrow.nrows;
1009     ii   = aij->compressedrow.i;
1010     ridx = aij->compressedrow.rindex;
1011     for (i=0; i<m; i++) {
1012       n  = ii[i+1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij->a + ii[i];
1015 
1016       for (j=0; j<n; j++) {
1017         if (PetscAbsScalar(mask[*aj])) {
1018           if (b) bb[*ridx] -= *aa*xx[*aj];
1019           *aa = 0.0;
1020         }
1021         aa++;
1022         aj++;
1023       }
1024       ridx++;
1025     }
1026   } else { /* do not use compressed row format */
1027     m = l->B->rmap->n;
1028     for (i=0; i<m; i++) {
1029       n  = ii[i+1] - ii[i];
1030       aj = aij->j + ii[i];
1031       aa = aij->a + ii[i];
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[i] -= *aa*xx[*aj];
1035           *aa = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040     }
1041   }
1042   if (x && b) {
1043     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1044     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1045   }
1046   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1047   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1048   ierr = PetscFree(lrows);CHKERRQ(ierr);
1049 
1050   /* only change matrix nonzero state if pattern was allowed to be changed */
1051   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1052     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1053     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1059 {
1060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscInt       nt;
1063   VecScatter     Mvctx = a->Mvctx;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1067   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1068 
1069   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1071   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1072   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1077 {
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089   PetscErrorCode ierr;
1090   VecScatter     Mvctx = a->Mvctx;
1091 
1092   PetscFunctionBegin;
1093   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1094   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1095   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1096   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1097   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   /* do nondiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1109   /* do local part */
1110   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1111   /* add partial results together */
1112   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1118 {
1119   MPI_Comm       comm;
1120   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1121   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1122   IS             Me,Notme;
1123   PetscErrorCode ierr;
1124   PetscInt       M,N,first,last,*notme,i;
1125   PetscBool      lf;
1126   PetscMPIInt    size;
1127 
1128   PetscFunctionBegin;
1129   /* Easy test: symmetric diagonal block */
1130   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1131   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1132   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1133   if (!*f) PetscFunctionReturn(0);
1134   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1135   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1136   if (size == 1) PetscFunctionReturn(0);
1137 
1138   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1139   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1140   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1141   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1142   for (i=0; i<first; i++) notme[i] = i;
1143   for (i=last; i<M; i++) notme[i-last+first] = i;
1144   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1145   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1146   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1147   Aoff = Aoffs[0];
1148   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1149   Boff = Boffs[0];
1150   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1151   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1152   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1153   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1154   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1155   ierr = PetscFree(notme);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1160 {
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1169 {
1170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   /* do nondiagonal part */
1175   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1176   /* do local part */
1177   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1178   /* add partial results together */
1179   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 /*
1185   This only works correctly for square matrices where the subblock A->A is the
1186    diagonal block
1187 */
1188 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1189 {
1190   PetscErrorCode ierr;
1191   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1192 
1193   PetscFunctionBegin;
1194   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1195   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1196   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1207   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1212 {
1213   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217 #if defined(PETSC_USE_LOG)
1218   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1219 #endif
1220   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1221   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1222   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1223   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1224 #if defined(PETSC_USE_CTABLE)
1225   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1226 #else
1227   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1230   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1231   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1232   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1233   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1234   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1235   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1236 
1237   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1246 #if defined(PETSC_HAVE_ELEMENTAL)
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1248 #endif
1249 #if defined(PETSC_HAVE_HYPRE)
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1252 #endif
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1259 {
1260   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1261   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1262   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1263   PetscErrorCode ierr;
1264   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1265   int            fd;
1266   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1267   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1268   PetscScalar    *column_values;
1269   PetscInt       message_count,flowcontrolcount;
1270   FILE           *file;
1271 
1272   PetscFunctionBegin;
1273   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1274   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1275   nz   = A->nz + B->nz;
1276   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1277   if (!rank) {
1278     header[0] = MAT_FILE_CLASSID;
1279     header[1] = mat->rmap->N;
1280     header[2] = mat->cmap->N;
1281 
1282     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     /* get largest number of rows any processor has */
1285     rlen  = mat->rmap->n;
1286     range = mat->rmap->range;
1287     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1288   } else {
1289     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1290     rlen = mat->rmap->n;
1291   }
1292 
1293   /* load up the local row counts */
1294   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1295   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1296 
1297   /* store the row lengths to the file */
1298   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1299   if (!rank) {
1300     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1301     for (i=1; i<size; i++) {
1302       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1303       rlen = range[i+1] - range[i];
1304       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1306     }
1307     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1308   } else {
1309     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1310     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1311     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1312   }
1313   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1314 
1315   /* load up the local column indices */
1316   nzmax = nz; /* th processor needs space a largest processor needs */
1317   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1319   cnt   = 0;
1320   for (i=0; i<mat->rmap->n; i++) {
1321     for (j=B->i[i]; j<B->i[i+1]; j++) {
1322       if ((col = garray[B->j[j]]) > cstart) break;
1323       column_indices[cnt++] = col;
1324     }
1325     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1326     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1327   }
1328   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1329 
1330   /* store the column indices to the file */
1331   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1332   if (!rank) {
1333     MPI_Status status;
1334     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1337       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1338       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1339       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1345     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1346     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1348   }
1349   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1350 
1351   /* load up the local column values */
1352   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if (garray[B->j[j]] > cstart) break;
1357       column_values[cnt++] = B->a[j];
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1360     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1361   }
1362   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1363 
1364   /* store the column values to the file */
1365   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1366   if (!rank) {
1367     MPI_Status status;
1368     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1369     for (i=1; i<size; i++) {
1370       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1371       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1372       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1373       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1374       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1375     }
1376     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1377   } else {
1378     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1379     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1380     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1381     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1382   }
1383   ierr = PetscFree(column_values);CHKERRQ(ierr);
1384 
1385   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1386   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #include <petscdraw.h>
1391 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1392 {
1393   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1394   PetscErrorCode    ierr;
1395   PetscMPIInt       rank = aij->rank,size = aij->size;
1396   PetscBool         isdraw,iascii,isbinary;
1397   PetscViewer       sviewer;
1398   PetscViewerFormat format;
1399 
1400   PetscFunctionBegin;
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1402   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1404   if (iascii) {
1405     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1406     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1407       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1408       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1409       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1410       for (i=0; i<(PetscInt)size; i++) {
1411         nmax = PetscMax(nmax,nz[i]);
1412         nmin = PetscMin(nmin,nz[i]);
1413         navg += nz[i];
1414       }
1415       ierr = PetscFree(nz);CHKERRQ(ierr);
1416       navg = navg/size;
1417       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1418       PetscFunctionReturn(0);
1419     }
1420     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1421     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1422       MatInfo   info;
1423       PetscBool inodes;
1424 
1425       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1426       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1427       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1429       if (!inodes) {
1430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1431                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1432       } else {
1433         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1434                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1435       }
1436       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1437       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1438       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1440       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1442       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1443       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1446       PetscInt inodecount,inodelimit,*inodes;
1447       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1448       if (inodes) {
1449         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1450       } else {
1451         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1452       }
1453       PetscFunctionReturn(0);
1454     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1455       PetscFunctionReturn(0);
1456     }
1457   } else if (isbinary) {
1458     if (size == 1) {
1459       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1460       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1463     }
1464     PetscFunctionReturn(0);
1465   } else if (iascii && size == 1) {
1466     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1467     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1468     PetscFunctionReturn(0);
1469   } else if (isdraw) {
1470     PetscDraw draw;
1471     PetscBool isnull;
1472     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1473     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1474     if (isnull) PetscFunctionReturn(0);
1475   }
1476 
1477   { /* assemble the entire matrix onto first processor */
1478     Mat A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1484     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1485 /*  The commented code uses MatCreateSubMatrices instead */
1486 /*
1487     Mat *AA, A = NULL, Av;
1488     IS  isrow,iscol;
1489 
1490     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1491     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1492     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1493     if (!rank) {
1494        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1495        A    = AA[0];
1496        Av   = AA[0];
1497     }
1498     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1499 */
1500     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1501     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1502     /*
1503        Everyone has to call to draw the matrix since the graphics waits are
1504        synchronized across all processors that share the PetscDraw object
1505     */
1506     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1507     if (!rank) {
1508       if (((PetscObject)mat)->name) {
1509         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1510       }
1511       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1512     }
1513     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1514     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1515     ierr = MatDestroy(&A);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1521 {
1522   PetscErrorCode ierr;
1523   PetscBool      iascii,isdraw,issocket,isbinary;
1524 
1525   PetscFunctionBegin;
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1527   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1528   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1529   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1530   if (iascii || isdraw || isbinary || issocket) {
1531     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1537 {
1538   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1539   PetscErrorCode ierr;
1540   Vec            bb1 = 0;
1541   PetscBool      hasop;
1542 
1543   PetscFunctionBegin;
1544   if (flag == SOR_APPLY_UPPER) {
1545     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1546     PetscFunctionReturn(0);
1547   }
1548 
1549   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1550     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1551   }
1552 
1553   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1554     if (flag & SOR_ZERO_INITIAL_GUESS) {
1555       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1556       its--;
1557     }
1558 
1559     while (its--) {
1560       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1561       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1562 
1563       /* update rhs: bb1 = bb - B*x */
1564       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1565       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1566 
1567       /* local sweep */
1568       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1569     }
1570   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1571     if (flag & SOR_ZERO_INITIAL_GUESS) {
1572       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1573       its--;
1574     }
1575     while (its--) {
1576       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578 
1579       /* update rhs: bb1 = bb - B*x */
1580       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1581       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1582 
1583       /* local sweep */
1584       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1585     }
1586   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1587     if (flag & SOR_ZERO_INITIAL_GUESS) {
1588       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1589       its--;
1590     }
1591     while (its--) {
1592       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1594 
1595       /* update rhs: bb1 = bb - B*x */
1596       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1597       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1598 
1599       /* local sweep */
1600       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1601     }
1602   } else if (flag & SOR_EISENSTAT) {
1603     Vec xx1;
1604 
1605     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1607 
1608     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610     if (!mat->diag) {
1611       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1612       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1613     }
1614     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1615     if (hasop) {
1616       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1617     } else {
1618       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1619     }
1620     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1621 
1622     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1623 
1624     /* local sweep */
1625     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1626     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1627     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1628   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1629 
1630   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1631 
1632   matin->factorerrortype = mat->A->factorerrortype;
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1637 {
1638   Mat            aA,aB,Aperm;
1639   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1640   PetscScalar    *aa,*ba;
1641   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1642   PetscSF        rowsf,sf;
1643   IS             parcolp = NULL;
1644   PetscBool      done;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1649   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1650   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1651   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1652 
1653   /* Invert row permutation to find out where my rows should go */
1654   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1655   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1656   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1658   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1660 
1661   /* Invert column permutation to find out where my columns should go */
1662   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1663   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1664   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1665   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1666   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1667   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1668   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1669 
1670   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1671   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1672   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1673 
1674   /* Find out where my gcols should go */
1675   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1676   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1678   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1679   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1680   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1681   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1682   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1683 
1684   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1685   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1686   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     PetscInt row = rdest[i],rowner;
1689     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1690     for (j=ai[i]; j<ai[i+1]; j++) {
1691       PetscInt cowner,col = cdest[aj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696     for (j=bi[i]; j<bi[i+1]; j++) {
1697       PetscInt cowner,col = gcdest[bj[j]];
1698       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1699       if (rowner == cowner) dnnz[i]++;
1700       else onnz[i]++;
1701     }
1702   }
1703   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1704   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1705   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1706   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1707   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1708 
1709   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1710   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1711   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1712   for (i=0; i<m; i++) {
1713     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1714     PetscInt j0,rowlen;
1715     rowlen = ai[i+1] - ai[i];
1716     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1717       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1718       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1719     }
1720     rowlen = bi[i+1] - bi[i];
1721     for (j0=j=0; j<rowlen; j0=j) {
1722       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1723       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1724     }
1725   }
1726   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1728   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1729   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1730   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1731   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1732   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1733   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1734   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1735   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1736   *B = Aperm;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1741 {
1742   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1747   if (ghosts) *ghosts = aij->garray;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1752 {
1753   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1754   Mat            A    = mat->A,B = mat->B;
1755   PetscErrorCode ierr;
1756   PetscReal      isend[5],irecv[5];
1757 
1758   PetscFunctionBegin;
1759   info->block_size = 1.0;
1760   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1761 
1762   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1763   isend[3] = info->memory;  isend[4] = info->mallocs;
1764 
1765   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1766 
1767   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1768   isend[3] += info->memory;  isend[4] += info->mallocs;
1769   if (flag == MAT_LOCAL) {
1770     info->nz_used      = isend[0];
1771     info->nz_allocated = isend[1];
1772     info->nz_unneeded  = isend[2];
1773     info->memory       = isend[3];
1774     info->mallocs      = isend[4];
1775   } else if (flag == MAT_GLOBAL_MAX) {
1776     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1777 
1778     info->nz_used      = irecv[0];
1779     info->nz_allocated = irecv[1];
1780     info->nz_unneeded  = irecv[2];
1781     info->memory       = irecv[3];
1782     info->mallocs      = irecv[4];
1783   } else if (flag == MAT_GLOBAL_SUM) {
1784     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1785 
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   }
1792   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1793   info->fill_ratio_needed = 0;
1794   info->factor_mallocs    = 0;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1799 {
1800   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   switch (op) {
1805   case MAT_NEW_NONZERO_LOCATIONS:
1806   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1807   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1808   case MAT_KEEP_NONZERO_PATTERN:
1809   case MAT_NEW_NONZERO_LOCATION_ERR:
1810   case MAT_USE_INODES:
1811   case MAT_IGNORE_ZERO_ENTRIES:
1812     MatCheckPreallocated(A,1);
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_ROW_ORIENTED:
1817     MatCheckPreallocated(A,1);
1818     a->roworiented = flg;
1819 
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1822     break;
1823   case MAT_NEW_DIAGONALS:
1824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1825     break;
1826   case MAT_IGNORE_OFF_PROC_ENTRIES:
1827     a->donotstash = flg;
1828     break;
1829   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1830   case MAT_SPD:
1831   case MAT_SYMMETRIC:
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833   case MAT_HERMITIAN:
1834   case MAT_SYMMETRY_ETERNAL:
1835     break;
1836   case MAT_SUBMAT_SINGLEIS:
1837     A->submat_singleis = flg;
1838     break;
1839   case MAT_STRUCTURE_ONLY:
1840     /* The option is handled directly by MatSetOption() */
1841     break;
1842   default:
1843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1849 {
1850   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1851   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1852   PetscErrorCode ierr;
1853   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1854   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1855   PetscInt       *cmap,*idx_p;
1856 
1857   PetscFunctionBegin;
1858   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1859   mat->getrowactive = PETSC_TRUE;
1860 
1861   if (!mat->rowvalues && (idx || v)) {
1862     /*
1863         allocate enough space to hold information from the longest row.
1864     */
1865     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1866     PetscInt   max = 1,tmp;
1867     for (i=0; i<matin->rmap->n; i++) {
1868       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1869       if (max < tmp) max = tmp;
1870     }
1871     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1872   }
1873 
1874   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1875   lrow = row - rstart;
1876 
1877   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1878   if (!v)   {pvA = 0; pvB = 0;}
1879   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1880   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1881   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1882   nztot = nzA + nzB;
1883 
1884   cmap = mat->garray;
1885   if (v  || idx) {
1886     if (nztot) {
1887       /* Sort by increasing column numbers, assuming A and B already sorted */
1888       PetscInt imark = -1;
1889       if (v) {
1890         *v = v_p = mat->rowvalues;
1891         for (i=0; i<nzB; i++) {
1892           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1893           else break;
1894         }
1895         imark = i;
1896         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1897         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1898       }
1899       if (idx) {
1900         *idx = idx_p = mat->rowindices;
1901         if (imark > -1) {
1902           for (i=0; i<imark; i++) {
1903             idx_p[i] = cmap[cworkB[i]];
1904           }
1905         } else {
1906           for (i=0; i<nzB; i++) {
1907             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1908             else break;
1909           }
1910           imark = i;
1911         }
1912         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1913         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1914       }
1915     } else {
1916       if (idx) *idx = 0;
1917       if (v)   *v   = 0;
1918     }
1919   }
1920   *nz  = nztot;
1921   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1922   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1927 {
1928   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1929 
1930   PetscFunctionBegin;
1931   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1932   aij->getrowactive = PETSC_FALSE;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1937 {
1938   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1939   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,cstart = mat->cmap->rstart;
1942   PetscReal      sum = 0.0;
1943   MatScalar      *v;
1944 
1945   PetscFunctionBegin;
1946   if (aij->size == 1) {
1947     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1948   } else {
1949     if (type == NORM_FROBENIUS) {
1950       v = amat->a;
1951       for (i=0; i<amat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       v = bmat->a;
1955       for (i=0; i<bmat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1961     } else if (type == NORM_1) { /* max column norm */
1962       PetscReal *tmp,*tmp2;
1963       PetscInt  *jj,*garray = aij->garray;
1964       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1965       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v     = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1982     } else if (type == NORM_INFINITY) { /* max row norm */
1983       PetscReal ntemp = 0.0;
1984       for (j=0; j<aij->A->rmap->n; j++) {
1985         v   = amat->a + amat->i[j];
1986         sum = 0.0;
1987         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         v = bmat->a + bmat->i[j];
1991         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         if (sum > ntemp) ntemp = sum;
1995       }
1996       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1997       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1998     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2004 {
2005   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2006   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2007   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2008   PetscErrorCode ierr;
2009   Mat            B,A_diag,*B_diag;
2010   MatScalar      *array;
2011 
2012   PetscFunctionBegin;
2013   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2014   ai = Aloc->i; aj = Aloc->j;
2015   bi = Bloc->i; bj = Bloc->j;
2016   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2017     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2018     PetscSFNode          *oloc;
2019     PETSC_UNUSED PetscSF sf;
2020 
2021     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2022     /* compute d_nnz for preallocation */
2023     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2024     for (i=0; i<ai[ma]; i++) {
2025       d_nnz[aj[i]]++;
2026     }
2027     /* compute local off-diagonal contributions */
2028     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2029     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2030     /* map those to global */
2031     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2032     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2033     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2034     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2035     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2036     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2037     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2038 
2039     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2040     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2041     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2042     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2043     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2044     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2045   } else {
2046     B    = *matout;
2047     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2048   }
2049 
2050   b           = (Mat_MPIAIJ*)B->data;
2051   A_diag      = a->A;
2052   B_diag      = &b->A;
2053   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2054   A_diag_ncol = A_diag->cmap->N;
2055   B_diag_ilen = sub_B_diag->ilen;
2056   B_diag_i    = sub_B_diag->i;
2057 
2058   /* Set ilen for diagonal of B */
2059   for (i=0; i<A_diag_ncol; i++) {
2060     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2061   }
2062 
2063   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2064   very quickly (=without using MatSetValues), because all writes are local. */
2065   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2066 
2067   /* copy over the B part */
2068   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2069   array = Bloc->a;
2070   row   = A->rmap->rstart;
2071   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2072   cols_tmp = cols;
2073   for (i=0; i<mb; i++) {
2074     ncol = bi[i+1]-bi[i];
2075     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2076     row++;
2077     array += ncol; cols_tmp += ncol;
2078   }
2079   ierr = PetscFree(cols);CHKERRQ(ierr);
2080 
2081   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2084     *matout = B;
2085   } else {
2086     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2092 {
2093   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2094   Mat            a    = aij->A,b = aij->B;
2095   PetscErrorCode ierr;
2096   PetscInt       s1,s2,s3;
2097 
2098   PetscFunctionBegin;
2099   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2100   if (rr) {
2101     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2102     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2103     /* Overlap communication with computation. */
2104     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2105   }
2106   if (ll) {
2107     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2108     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2109     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2110   }
2111   /* scale  the diagonal block */
2112   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2113 
2114   if (rr) {
2115     /* Do a scatter end and then right scale the off-diagonal block */
2116     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2117     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2123 {
2124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2133 {
2134   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2135   Mat            a,b,c,d;
2136   PetscBool      flg;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   a = matA->A; b = matA->B;
2141   c = matB->A; d = matB->B;
2142 
2143   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2146   }
2147   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2152 {
2153   PetscErrorCode ierr;
2154   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2155   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2156 
2157   PetscFunctionBegin;
2158   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2159   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2160     /* because of the column compression in the off-processor part of the matrix a->B,
2161        the number of columns in a->B and b->B may be different, hence we cannot call
2162        the MatCopy() directly on the two parts. If need be, we can provide a more
2163        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2164        then copying the submatrices */
2165     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2166   } else {
2167     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2168     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2169   }
2170   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2175 {
2176   PetscErrorCode ierr;
2177 
2178   PetscFunctionBegin;
2179   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 /*
2184    Computes the number of nonzeros per row needed for preallocation when X and Y
2185    have different nonzero structure.
2186 */
2187 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2188 {
2189   PetscInt       i,j,k,nzx,nzy;
2190 
2191   PetscFunctionBegin;
2192   /* Set the number of nonzeros in the new matrix */
2193   for (i=0; i<m; i++) {
2194     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2195     nzx = xi[i+1] - xi[i];
2196     nzy = yi[i+1] - yi[i];
2197     nnz[i] = 0;
2198     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2199       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2200       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2201       nnz[i]++;
2202     }
2203     for (; k<nzy; k++) nnz[i]++;
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2210 {
2211   PetscErrorCode ierr;
2212   PetscInt       m = Y->rmap->N;
2213   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2214   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2222 {
2223   PetscErrorCode ierr;
2224   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2225   PetscBLASInt   bnz,one=1;
2226   Mat_SeqAIJ     *x,*y;
2227 
2228   PetscFunctionBegin;
2229   if (str == SAME_NONZERO_PATTERN) {
2230     PetscScalar alpha = a;
2231     x    = (Mat_SeqAIJ*)xx->A->data;
2232     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2233     y    = (Mat_SeqAIJ*)yy->A->data;
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     x    = (Mat_SeqAIJ*)xx->B->data;
2236     y    = (Mat_SeqAIJ*)yy->B->data;
2237     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2238     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2239     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2240   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2241     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2242   } else {
2243     Mat      B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2266 {
2267 #if defined(PETSC_USE_COMPLEX)
2268   PetscErrorCode ierr;
2269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2273   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2274 #else
2275   PetscFunctionBegin;
2276 #endif
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2298   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2303 {
2304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode ierr;
2306   PetscInt       i,*idxb = 0;
2307   PetscScalar    *va,*vb;
2308   Vec            vtmp;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2312   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2313   if (idx) {
2314     for (i=0; i<A->rmap->n; i++) {
2315       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2316     }
2317   }
2318 
2319   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2320   if (idx) {
2321     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2322   }
2323   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2324   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2325 
2326   for (i=0; i<A->rmap->n; i++) {
2327     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2328       va[i] = vb[i];
2329       if (idx) idx[i] = a->garray[idxb[i]];
2330     }
2331   }
2332 
2333   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2334   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2335   ierr = PetscFree(idxb);CHKERRQ(ierr);
2336   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,*idxb = 0;
2345   PetscScalar    *va,*vb;
2346   Vec            vtmp;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351   if (idx) {
2352     for (i=0; i<A->cmap->n; i++) {
2353       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2354     }
2355   }
2356 
2357   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2358   if (idx) {
2359     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2360   }
2361   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2362   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2363 
2364   for (i=0; i<A->rmap->n; i++) {
2365     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2366       va[i] = vb[i];
2367       if (idx) idx[i] = a->garray[idxb[i]];
2368     }
2369   }
2370 
2371   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2373   ierr = PetscFree(idxb);CHKERRQ(ierr);
2374   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2379 {
2380   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2381   PetscInt       n      = A->rmap->n;
2382   PetscInt       cstart = A->cmap->rstart;
2383   PetscInt       *cmap  = mat->garray;
2384   PetscInt       *diagIdx, *offdiagIdx;
2385   Vec            diagV, offdiagV;
2386   PetscScalar    *a, *diagA, *offdiagA;
2387   PetscInt       r;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2392   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2393   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2394   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2395   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2396   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   for (r = 0; r < n; ++r) {
2400     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2401       a[r]   = diagA[r];
2402       idx[r] = cstart + diagIdx[r];
2403     } else {
2404       a[r]   = offdiagA[r];
2405       idx[r] = cmap[offdiagIdx[r]];
2406     }
2407   }
2408   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2409   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2411   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2412   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2413   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2418 {
2419   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2420   PetscInt       n      = A->rmap->n;
2421   PetscInt       cstart = A->cmap->rstart;
2422   PetscInt       *cmap  = mat->garray;
2423   PetscInt       *diagIdx, *offdiagIdx;
2424   Vec            diagV, offdiagV;
2425   PetscScalar    *a, *diagA, *offdiagA;
2426   PetscInt       r;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2436   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2437   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2438   for (r = 0; r < n; ++r) {
2439     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2440       a[r]   = diagA[r];
2441       idx[r] = cstart + diagIdx[r];
2442     } else {
2443       a[r]   = offdiagA[r];
2444       idx[r] = cmap[offdiagIdx[r]];
2445     }
2446   }
2447   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2451   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2452   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2457 {
2458   PetscErrorCode ierr;
2459   Mat            *dummy;
2460 
2461   PetscFunctionBegin;
2462   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2463   *newmat = *dummy;
2464   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2469 {
2470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2475   A->factorerrortype = a->A->factorerrortype;
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2480 {
2481   PetscErrorCode ierr;
2482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2483 
2484   PetscFunctionBegin;
2485   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2486   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2487   if (x->assembled) {
2488     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2489   } else {
2490     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2491   }
2492   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2493   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2498 {
2499   PetscFunctionBegin;
2500   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2501   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 /*@
2506    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2507 
2508    Collective on Mat
2509 
2510    Input Parameters:
2511 +    A - the matrix
2512 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2513 
2514  Level: advanced
2515 
2516 @*/
2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2518 {
2519   PetscErrorCode       ierr;
2520 
2521   PetscFunctionBegin;
2522   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2527 {
2528   PetscErrorCode       ierr;
2529   PetscBool            sc = PETSC_FALSE,flg;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2533   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2534   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2535   if (flg) {
2536     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2537   }
2538   ierr = PetscOptionsTail();CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2543 {
2544   PetscErrorCode ierr;
2545   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2546   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2547 
2548   PetscFunctionBegin;
2549   if (!Y->preallocated) {
2550     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2551   } else if (!aij->nz) {
2552     PetscInt nonew = aij->nonew;
2553     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2554     aij->nonew = nonew;
2555   }
2556   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2561 {
2562   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2567   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2568   if (d) {
2569     PetscInt rstart;
2570     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2571     *d += rstart;
2572 
2573   }
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2578 {
2579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2580   PetscErrorCode ierr;
2581 
2582   PetscFunctionBegin;
2583   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 /* -------------------------------------------------------------------*/
2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2589                                        MatGetRow_MPIAIJ,
2590                                        MatRestoreRow_MPIAIJ,
2591                                        MatMult_MPIAIJ,
2592                                 /* 4*/ MatMultAdd_MPIAIJ,
2593                                        MatMultTranspose_MPIAIJ,
2594                                        MatMultTransposeAdd_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*10*/ 0,
2599                                        0,
2600                                        0,
2601                                        MatSOR_MPIAIJ,
2602                                        MatTranspose_MPIAIJ,
2603                                 /*15*/ MatGetInfo_MPIAIJ,
2604                                        MatEqual_MPIAIJ,
2605                                        MatGetDiagonal_MPIAIJ,
2606                                        MatDiagonalScale_MPIAIJ,
2607                                        MatNorm_MPIAIJ,
2608                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2609                                        MatAssemblyEnd_MPIAIJ,
2610                                        MatSetOption_MPIAIJ,
2611                                        MatZeroEntries_MPIAIJ,
2612                                 /*24*/ MatZeroRows_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*29*/ MatSetUp_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        MatGetDiagonalBlock_MPIAIJ,
2621                                        0,
2622                                 /*34*/ MatDuplicate_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*39*/ MatAXPY_MPIAIJ,
2628                                        MatCreateSubMatrices_MPIAIJ,
2629                                        MatIncreaseOverlap_MPIAIJ,
2630                                        MatGetValues_MPIAIJ,
2631                                        MatCopy_MPIAIJ,
2632                                 /*44*/ MatGetRowMax_MPIAIJ,
2633                                        MatScale_MPIAIJ,
2634                                        MatShift_MPIAIJ,
2635                                        MatDiagonalSet_MPIAIJ,
2636                                        MatZeroRowsColumns_MPIAIJ,
2637                                 /*49*/ MatSetRandom_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2643                                        0,
2644                                        MatSetUnfactored_MPIAIJ,
2645                                        MatPermute_MPIAIJ,
2646                                        0,
2647                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2648                                        MatDestroy_MPIAIJ,
2649                                        MatView_MPIAIJ,
2650                                        0,
2651                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2652                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2653                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2658                                        MatGetRowMinAbs_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*75*/ MatFDColoringApply_AIJ,
2664                                        MatSetFromOptions_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        MatFindZeroDiagonals_MPIAIJ,
2668                                 /*80*/ 0,
2669                                        0,
2670                                        0,
2671                                 /*83*/ MatLoad_MPIAIJ,
2672                                        MatIsSymmetric_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2678                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2679                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2680                                        MatPtAP_MPIAIJ_MPIAIJ,
2681                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2682                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*99*/ 0,
2688                                        0,
2689                                        0,
2690                                        MatConjugate_MPIAIJ,
2691                                        0,
2692                                 /*104*/MatSetValuesRow_MPIAIJ,
2693                                        MatRealPart_MPIAIJ,
2694                                        MatImaginaryPart_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                 /*109*/0,
2698                                        0,
2699                                        MatGetRowMin_MPIAIJ,
2700                                        0,
2701                                        MatMissingDiagonal_MPIAIJ,
2702                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2703                                        0,
2704                                        MatGetGhosts_MPIAIJ,
2705                                        0,
2706                                        0,
2707                                 /*119*/0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatGetMultiProcBlock_MPIAIJ,
2712                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2713                                        MatGetColumnNorms_MPIAIJ,
2714                                        MatInvertBlockDiagonal_MPIAIJ,
2715                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2716                                        MatCreateSubMatricesMPI_MPIAIJ,
2717                                 /*129*/0,
2718                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2719                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2720                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                 /*134*/0,
2723                                        0,
2724                                        MatRARt_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                 /*139*/MatSetBlockSizes_MPIAIJ,
2728                                        0,
2729                                        0,
2730                                        MatFDColoringSetUp_MPIXAIJ,
2731                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2732                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2733 };
2734 
2735 /* ----------------------------------------------------------------------------------------*/
2736 
2737 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2738 {
2739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2744   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2749 {
2750   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2755   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763   PetscMPIInt    size;
2764 
2765   PetscFunctionBegin;
2766   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2768   b = (Mat_MPIAIJ*)B->data;
2769 
2770 #if defined(PETSC_USE_CTABLE)
2771   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2772 #else
2773   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2774 #endif
2775   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2776   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2777   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2778 
2779   /* Because the B will have been resized we simply destroy it and create a new one each time */
2780   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2781   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2782   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2783   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2784   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2785   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2786   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2787 
2788   if (!B->preallocated) {
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2791     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2792     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2793     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2794   }
2795 
2796   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2797   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2798   B->preallocated  = PETSC_TRUE;
2799   B->was_assembled = PETSC_FALSE;
2800   B->assembled     = PETSC_FALSE;
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2805 {
2806   Mat_MPIAIJ     *b;
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2811   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2812   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2813   b = (Mat_MPIAIJ*)B->data;
2814 
2815 #if defined(PETSC_USE_CTABLE)
2816   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2817 #else
2818   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2819 #endif
2820   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2821   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2822   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2823 
2824   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2825   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2826   B->preallocated  = PETSC_TRUE;
2827   B->was_assembled = PETSC_FALSE;
2828   B->assembled = PETSC_FALSE;
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2833 {
2834   Mat            mat;
2835   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   *newmat = 0;
2840   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2841   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2842   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2843   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2844   a       = (Mat_MPIAIJ*)mat->data;
2845 
2846   mat->factortype   = matin->factortype;
2847   mat->assembled    = PETSC_TRUE;
2848   mat->insertmode   = NOT_SET_VALUES;
2849   mat->preallocated = PETSC_TRUE;
2850 
2851   a->size         = oldmat->size;
2852   a->rank         = oldmat->rank;
2853   a->donotstash   = oldmat->donotstash;
2854   a->roworiented  = oldmat->roworiented;
2855   a->rowindices   = 0;
2856   a->rowvalues    = 0;
2857   a->getrowactive = PETSC_FALSE;
2858 
2859   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2860   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2861 
2862   if (oldmat->colmap) {
2863 #if defined(PETSC_USE_CTABLE)
2864     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2865 #else
2866     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2867     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2868     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2869 #endif
2870   } else a->colmap = 0;
2871   if (oldmat->garray) {
2872     PetscInt len;
2873     len  = oldmat->B->cmap->n;
2874     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2875     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2876     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2877   } else a->garray = 0;
2878 
2879   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2880   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2881   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2883 
2884   if (oldmat->Mvctx_mpi1) {
2885     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2886     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2887   }
2888 
2889   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2890   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2891   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2892   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2893   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2894   *newmat = mat;
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2899 {
2900   PetscBool      isbinary, ishdf5;
2901   PetscErrorCode ierr;
2902 
2903   PetscFunctionBegin;
2904   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2905   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2906   /* force binary viewer to load .info file if it has not yet done so */
2907   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2908   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2909   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2910   if (isbinary) {
2911     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2912   } else if (ishdf5) {
2913 #if defined(PETSC_HAVE_HDF5)
2914     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2915 #else
2916     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2917 #endif
2918   } else {
2919     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2920   }
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2925 {
2926   PetscScalar    *vals,*svals;
2927   MPI_Comm       comm;
2928   PetscErrorCode ierr;
2929   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2930   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2931   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2932   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2933   PetscInt       cend,cstart,n,*rowners;
2934   int            fd;
2935   PetscInt       bs = newMat->rmap->bs;
2936 
2937   PetscFunctionBegin;
2938   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2939   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2940   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2941   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2942   if (!rank) {
2943     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2944     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2945     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2946   }
2947 
2948   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2949   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2950   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2951   if (bs < 0) bs = 1;
2952 
2953   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2954   M    = header[1]; N = header[2];
2955 
2956   /* If global sizes are set, check if they are consistent with that given in the file */
2957   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2958   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2959 
2960   /* determine ownership of all (block) rows */
2961   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2962   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2963   else m = newMat->rmap->n; /* Set by user */
2964 
2965   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2966   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2967 
2968   /* First process needs enough room for process with most rows */
2969   if (!rank) {
2970     mmax = rowners[1];
2971     for (i=2; i<=size; i++) {
2972       mmax = PetscMax(mmax, rowners[i]);
2973     }
2974   } else mmax = -1;             /* unused, but compilers complain */
2975 
2976   rowners[0] = 0;
2977   for (i=2; i<=size; i++) {
2978     rowners[i] += rowners[i-1];
2979   }
2980   rstart = rowners[rank];
2981   rend   = rowners[rank+1];
2982 
2983   /* distribute row lengths to all processors */
2984   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2985   if (!rank) {
2986     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
2987     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2988     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2989     for (j=0; j<m; j++) {
2990       procsnz[0] += ourlens[j];
2991     }
2992     for (i=1; i<size; i++) {
2993       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
2994       /* calculate the number of nonzeros on each processor */
2995       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2996         procsnz[i] += rowlengths[j];
2997       }
2998       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2999     }
3000     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3001   } else {
3002     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3003   }
3004 
3005   if (!rank) {
3006     /* determine max buffer needed and allocate it */
3007     maxnz = 0;
3008     for (i=0; i<size; i++) {
3009       maxnz = PetscMax(maxnz,procsnz[i]);
3010     }
3011     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3012 
3013     /* read in my part of the matrix column indices  */
3014     nz   = procsnz[0];
3015     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3016     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3017 
3018     /* read in every one elses and ship off */
3019     for (i=1; i<size; i++) {
3020       nz   = procsnz[i];
3021       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3022       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3023     }
3024     ierr = PetscFree(cols);CHKERRQ(ierr);
3025   } else {
3026     /* determine buffer space needed for message */
3027     nz = 0;
3028     for (i=0; i<m; i++) {
3029       nz += ourlens[i];
3030     }
3031     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3032 
3033     /* receive message of column indices*/
3034     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3035   }
3036 
3037   /* determine column ownership if matrix is not square */
3038   if (N != M) {
3039     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3040     else n = newMat->cmap->n;
3041     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3042     cstart = cend - n;
3043   } else {
3044     cstart = rstart;
3045     cend   = rend;
3046     n      = cend - cstart;
3047   }
3048 
3049   /* loop over local rows, determining number of off diagonal entries */
3050   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3051   jj   = 0;
3052   for (i=0; i<m; i++) {
3053     for (j=0; j<ourlens[i]; j++) {
3054       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3055       jj++;
3056     }
3057   }
3058 
3059   for (i=0; i<m; i++) {
3060     ourlens[i] -= offlens[i];
3061   }
3062   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3063 
3064   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3065 
3066   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3067 
3068   for (i=0; i<m; i++) {
3069     ourlens[i] += offlens[i];
3070   }
3071 
3072   if (!rank) {
3073     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3074 
3075     /* read in my part of the matrix numerical values  */
3076     nz   = procsnz[0];
3077     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3078 
3079     /* insert into matrix */
3080     jj      = rstart;
3081     smycols = mycols;
3082     svals   = vals;
3083     for (i=0; i<m; i++) {
3084       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3085       smycols += ourlens[i];
3086       svals   += ourlens[i];
3087       jj++;
3088     }
3089 
3090     /* read in other processors and ship out */
3091     for (i=1; i<size; i++) {
3092       nz   = procsnz[i];
3093       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3094       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3095     }
3096     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3097   } else {
3098     /* receive numeric values */
3099     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3100 
3101     /* receive message of values*/
3102     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3103 
3104     /* insert into matrix */
3105     jj      = rstart;
3106     smycols = mycols;
3107     svals   = vals;
3108     for (i=0; i<m; i++) {
3109       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3110       smycols += ourlens[i];
3111       svals   += ourlens[i];
3112       jj++;
3113     }
3114   }
3115   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3116   ierr = PetscFree(vals);CHKERRQ(ierr);
3117   ierr = PetscFree(mycols);CHKERRQ(ierr);
3118   ierr = PetscFree(rowners);CHKERRQ(ierr);
3119   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3120   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3121   PetscFunctionReturn(0);
3122 }
3123 
3124 /* Not scalable because of ISAllGather() unless getting all columns. */
3125 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3126 {
3127   PetscErrorCode ierr;
3128   IS             iscol_local;
3129   PetscBool      isstride;
3130   PetscMPIInt    lisstride=0,gisstride;
3131 
3132   PetscFunctionBegin;
3133   /* check if we are grabbing all columns*/
3134   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3135 
3136   if (isstride) {
3137     PetscInt  start,len,mstart,mlen;
3138     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3139     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3140     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3141     if (mstart == start && mlen-mstart == len) lisstride = 1;
3142   }
3143 
3144   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3145   if (gisstride) {
3146     PetscInt N;
3147     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3148     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3149     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3150     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3151   } else {
3152     PetscInt cbs;
3153     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3154     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3155     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3156   }
3157 
3158   *isseq = iscol_local;
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 /*
3163  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3164  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3165 
3166  Input Parameters:
3167    mat - matrix
3168    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3169            i.e., mat->rstart <= isrow[i] < mat->rend
3170    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3171            i.e., mat->cstart <= iscol[i] < mat->cend
3172  Output Parameter:
3173    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3174    iscol_o - sequential column index set for retrieving mat->B
3175    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3176  */
3177 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3178 {
3179   PetscErrorCode ierr;
3180   Vec            x,cmap;
3181   const PetscInt *is_idx;
3182   PetscScalar    *xarray,*cmaparray;
3183   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3184   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3185   Mat            B=a->B;
3186   Vec            lvec=a->lvec,lcmap;
3187   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3188   MPI_Comm       comm;
3189   VecScatter     Mvctx=a->Mvctx;
3190 
3191   PetscFunctionBegin;
3192   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3193   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3194 
3195   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3196   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3197   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3198   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3199   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3200 
3201   /* Get start indices */
3202   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3203   isstart -= ncols;
3204   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3205 
3206   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3207   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3208   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3209   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3210   for (i=0; i<ncols; i++) {
3211     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3212     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3213     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3214   }
3215   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3216   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3217   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3218 
3219   /* Get iscol_d */
3220   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3221   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3222   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3223 
3224   /* Get isrow_d */
3225   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3226   rstart = mat->rmap->rstart;
3227   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3228   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3229   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3230   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3231 
3232   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3233   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3234   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3235 
3236   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3237   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3238   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3239 
3240   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3241 
3242   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3243   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3244 
3245   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3246   /* off-process column indices */
3247   count = 0;
3248   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3249   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3250 
3251   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3252   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3253   for (i=0; i<Bn; i++) {
3254     if (PetscRealPart(xarray[i]) > -1.0) {
3255       idx[count]     = i;                   /* local column index in off-diagonal part B */
3256       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3257       count++;
3258     }
3259   }
3260   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3261   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3262 
3263   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3264   /* cannot ensure iscol_o has same blocksize as iscol! */
3265 
3266   ierr = PetscFree(idx);CHKERRQ(ierr);
3267   *garray = cmap1;
3268 
3269   ierr = VecDestroy(&x);CHKERRQ(ierr);
3270   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3271   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3277 {
3278   PetscErrorCode ierr;
3279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3280   Mat            M = NULL;
3281   MPI_Comm       comm;
3282   IS             iscol_d,isrow_d,iscol_o;
3283   Mat            Asub = NULL,Bsub = NULL;
3284   PetscInt       n;
3285 
3286   PetscFunctionBegin;
3287   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3288 
3289   if (call == MAT_REUSE_MATRIX) {
3290     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3291     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3292     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3293 
3294     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3295     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3296 
3297     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3298     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3299 
3300     /* Update diagonal and off-diagonal portions of submat */
3301     asub = (Mat_MPIAIJ*)(*submat)->data;
3302     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3303     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3304     if (n) {
3305       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3306     }
3307     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3308     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3309 
3310   } else { /* call == MAT_INITIAL_MATRIX) */
3311     const PetscInt *garray;
3312     PetscInt        BsubN;
3313 
3314     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3315     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3316 
3317     /* Create local submatrices Asub and Bsub */
3318     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3319     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3320 
3321     /* Create submatrix M */
3322     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3323 
3324     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3325     asub = (Mat_MPIAIJ*)M->data;
3326 
3327     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3328     n = asub->B->cmap->N;
3329     if (BsubN > n) {
3330       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3331       const PetscInt *idx;
3332       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3333       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3334 
3335       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3336       j = 0;
3337       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3338       for (i=0; i<n; i++) {
3339         if (j >= BsubN) break;
3340         while (subgarray[i] > garray[j]) j++;
3341 
3342         if (subgarray[i] == garray[j]) {
3343           idx_new[i] = idx[j++];
3344         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3345       }
3346       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3347 
3348       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3349       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3350 
3351     } else if (BsubN < n) {
3352       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3353     }
3354 
3355     ierr = PetscFree(garray);CHKERRQ(ierr);
3356     *submat = M;
3357 
3358     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3359     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3360     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3361 
3362     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3363     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3364 
3365     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3366     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3367   }
3368   PetscFunctionReturn(0);
3369 }
3370 
3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3372 {
3373   PetscErrorCode ierr;
3374   IS             iscol_local=NULL,isrow_d;
3375   PetscInt       csize;
3376   PetscInt       n,i,j,start,end;
3377   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3378   MPI_Comm       comm;
3379 
3380   PetscFunctionBegin;
3381   /* If isrow has same processor distribution as mat,
3382      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3383   if (call == MAT_REUSE_MATRIX) {
3384     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3385     if (isrow_d) {
3386       sameRowDist  = PETSC_TRUE;
3387       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3388     } else {
3389       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3390       if (iscol_local) {
3391         sameRowDist  = PETSC_TRUE;
3392         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3393       }
3394     }
3395   } else {
3396     /* Check if isrow has same processor distribution as mat */
3397     sameDist[0] = PETSC_FALSE;
3398     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3399     if (!n) {
3400       sameDist[0] = PETSC_TRUE;
3401     } else {
3402       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3403       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3404       if (i >= start && j < end) {
3405         sameDist[0] = PETSC_TRUE;
3406       }
3407     }
3408 
3409     /* Check if iscol has same processor distribution as mat */
3410     sameDist[1] = PETSC_FALSE;
3411     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3412     if (!n) {
3413       sameDist[1] = PETSC_TRUE;
3414     } else {
3415       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3416       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3417       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3418     }
3419 
3420     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3421     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3422     sameRowDist = tsameDist[0];
3423   }
3424 
3425   if (sameRowDist) {
3426     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3427       /* isrow and iscol have same processor distribution as mat */
3428       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3429       PetscFunctionReturn(0);
3430     } else { /* sameRowDist */
3431       /* isrow has same processor distribution as mat */
3432       if (call == MAT_INITIAL_MATRIX) {
3433         PetscBool sorted;
3434         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3435         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3436         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3437         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3438 
3439         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3440         if (sorted) {
3441           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3442           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3443           PetscFunctionReturn(0);
3444         }
3445       } else { /* call == MAT_REUSE_MATRIX */
3446         IS    iscol_sub;
3447         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3448         if (iscol_sub) {
3449           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3450           PetscFunctionReturn(0);
3451         }
3452       }
3453     }
3454   }
3455 
3456   /* General case: iscol -> iscol_local which has global size of iscol */
3457   if (call == MAT_REUSE_MATRIX) {
3458     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3459     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3460   } else {
3461     if (!iscol_local) {
3462       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3463     }
3464   }
3465 
3466   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3467   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3468 
3469   if (call == MAT_INITIAL_MATRIX) {
3470     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3471     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3472   }
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 /*@C
3477      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3478          and "off-diagonal" part of the matrix in CSR format.
3479 
3480    Collective
3481 
3482    Input Parameters:
3483 +  comm - MPI communicator
3484 .  A - "diagonal" portion of matrix
3485 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3486 -  garray - global index of B columns
3487 
3488    Output Parameter:
3489 .   mat - the matrix, with input A as its local diagonal matrix
3490    Level: advanced
3491 
3492    Notes:
3493        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3494        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3495 
3496 .seealso: MatCreateMPIAIJWithSplitArrays()
3497 @*/
3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3499 {
3500   PetscErrorCode ierr;
3501   Mat_MPIAIJ     *maij;
3502   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3503   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3504   PetscScalar    *oa=b->a;
3505   Mat            Bnew;
3506   PetscInt       m,n,N;
3507 
3508   PetscFunctionBegin;
3509   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3510   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3511   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3512   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3513   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3514   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3515 
3516   /* Get global columns of mat */
3517   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3518 
3519   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3520   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3521   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3522   maij = (Mat_MPIAIJ*)(*mat)->data;
3523 
3524   (*mat)->preallocated = PETSC_TRUE;
3525 
3526   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3527   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3528 
3529   /* Set A as diagonal portion of *mat */
3530   maij->A = A;
3531 
3532   nz = oi[m];
3533   for (i=0; i<nz; i++) {
3534     col   = oj[i];
3535     oj[i] = garray[col];
3536   }
3537 
3538    /* Set Bnew as off-diagonal portion of *mat */
3539   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3540   bnew        = (Mat_SeqAIJ*)Bnew->data;
3541   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3542   maij->B     = Bnew;
3543 
3544   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3545 
3546   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3547   b->free_a       = PETSC_FALSE;
3548   b->free_ij      = PETSC_FALSE;
3549   ierr = MatDestroy(&B);CHKERRQ(ierr);
3550 
3551   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3552   bnew->free_a       = PETSC_TRUE;
3553   bnew->free_ij      = PETSC_TRUE;
3554 
3555   /* condense columns of maij->B */
3556   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3557   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3558   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3560   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3565 
3566 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3567 {
3568   PetscErrorCode ierr;
3569   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3570   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3571   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3572   Mat            M,Msub,B=a->B;
3573   MatScalar      *aa;
3574   Mat_SeqAIJ     *aij;
3575   PetscInt       *garray = a->garray,*colsub,Ncols;
3576   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3577   IS             iscol_sub,iscmap;
3578   const PetscInt *is_idx,*cmap;
3579   PetscBool      allcolumns=PETSC_FALSE;
3580   MPI_Comm       comm;
3581 
3582   PetscFunctionBegin;
3583   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3584 
3585   if (call == MAT_REUSE_MATRIX) {
3586     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3587     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3588     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3589 
3590     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3591     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3592 
3593     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3594     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3595 
3596     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3597 
3598   } else { /* call == MAT_INITIAL_MATRIX) */
3599     PetscBool flg;
3600 
3601     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3602     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3603 
3604     /* (1) iscol -> nonscalable iscol_local */
3605     /* Check for special case: each processor gets entire matrix columns */
3606     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3607     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3608     if (allcolumns) {
3609       iscol_sub = iscol_local;
3610       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3611       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3612 
3613     } else {
3614       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3615       PetscInt *idx,*cmap1,k;
3616       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3617       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3618       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3619       count = 0;
3620       k     = 0;
3621       for (i=0; i<Ncols; i++) {
3622         j = is_idx[i];
3623         if (j >= cstart && j < cend) {
3624           /* diagonal part of mat */
3625           idx[count]     = j;
3626           cmap1[count++] = i; /* column index in submat */
3627         } else if (Bn) {
3628           /* off-diagonal part of mat */
3629           if (j == garray[k]) {
3630             idx[count]     = j;
3631             cmap1[count++] = i;  /* column index in submat */
3632           } else if (j > garray[k]) {
3633             while (j > garray[k] && k < Bn-1) k++;
3634             if (j == garray[k]) {
3635               idx[count]     = j;
3636               cmap1[count++] = i; /* column index in submat */
3637             }
3638           }
3639         }
3640       }
3641       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3642 
3643       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3644       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3645       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3646 
3647       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3648     }
3649 
3650     /* (3) Create sequential Msub */
3651     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3652   }
3653 
3654   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3655   aij  = (Mat_SeqAIJ*)(Msub)->data;
3656   ii   = aij->i;
3657   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3658 
3659   /*
3660       m - number of local rows
3661       Ncols - number of columns (same on all processors)
3662       rstart - first row in new global matrix generated
3663   */
3664   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3665 
3666   if (call == MAT_INITIAL_MATRIX) {
3667     /* (4) Create parallel newmat */
3668     PetscMPIInt    rank,size;
3669     PetscInt       csize;
3670 
3671     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3672     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3673 
3674     /*
3675         Determine the number of non-zeros in the diagonal and off-diagonal
3676         portions of the matrix in order to do correct preallocation
3677     */
3678 
3679     /* first get start and end of "diagonal" columns */
3680     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3681     if (csize == PETSC_DECIDE) {
3682       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3683       if (mglobal == Ncols) { /* square matrix */
3684         nlocal = m;
3685       } else {
3686         nlocal = Ncols/size + ((Ncols % size) > rank);
3687       }
3688     } else {
3689       nlocal = csize;
3690     }
3691     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3692     rstart = rend - nlocal;
3693     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3694 
3695     /* next, compute all the lengths */
3696     jj    = aij->j;
3697     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3698     olens = dlens + m;
3699     for (i=0; i<m; i++) {
3700       jend = ii[i+1] - ii[i];
3701       olen = 0;
3702       dlen = 0;
3703       for (j=0; j<jend; j++) {
3704         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3705         else dlen++;
3706         jj++;
3707       }
3708       olens[i] = olen;
3709       dlens[i] = dlen;
3710     }
3711 
3712     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3713     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3714 
3715     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3716     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3717     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3718     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3719     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3720     ierr = PetscFree(dlens);CHKERRQ(ierr);
3721 
3722   } else { /* call == MAT_REUSE_MATRIX */
3723     M    = *newmat;
3724     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3725     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3726     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3727     /*
3728          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3729        rather than the slower MatSetValues().
3730     */
3731     M->was_assembled = PETSC_TRUE;
3732     M->assembled     = PETSC_FALSE;
3733   }
3734 
3735   /* (5) Set values of Msub to *newmat */
3736   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3737   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3738 
3739   jj   = aij->j;
3740   aa   = aij->a;
3741   for (i=0; i<m; i++) {
3742     row = rstart + i;
3743     nz  = ii[i+1] - ii[i];
3744     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3745     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3746     jj += nz; aa += nz;
3747   }
3748   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3749 
3750   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3751   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752 
3753   ierr = PetscFree(colsub);CHKERRQ(ierr);
3754 
3755   /* save Msub, iscol_sub and iscmap used in processor for next request */
3756   if (call ==  MAT_INITIAL_MATRIX) {
3757     *newmat = M;
3758     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3759     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3760 
3761     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3762     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3763 
3764     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3765     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3766 
3767     if (iscol_local) {
3768       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3769       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3770     }
3771   }
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 /*
3776     Not great since it makes two copies of the submatrix, first an SeqAIJ
3777   in local and then by concatenating the local matrices the end result.
3778   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3779 
3780   Note: This requires a sequential iscol with all indices.
3781 */
3782 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3783 {
3784   PetscErrorCode ierr;
3785   PetscMPIInt    rank,size;
3786   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3787   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3788   Mat            M,Mreuse;
3789   MatScalar      *aa,*vwork;
3790   MPI_Comm       comm;
3791   Mat_SeqAIJ     *aij;
3792   PetscBool      colflag,allcolumns=PETSC_FALSE;
3793 
3794   PetscFunctionBegin;
3795   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3796   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3798 
3799   /* Check for special case: each processor gets entire matrix columns */
3800   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3801   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3802   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3803 
3804   if (call ==  MAT_REUSE_MATRIX) {
3805     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3806     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3807     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3808   } else {
3809     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3810   }
3811 
3812   /*
3813       m - number of local rows
3814       n - number of columns (same on all processors)
3815       rstart - first row in new global matrix generated
3816   */
3817   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3818   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3819   if (call == MAT_INITIAL_MATRIX) {
3820     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3821     ii  = aij->i;
3822     jj  = aij->j;
3823 
3824     /*
3825         Determine the number of non-zeros in the diagonal and off-diagonal
3826         portions of the matrix in order to do correct preallocation
3827     */
3828 
3829     /* first get start and end of "diagonal" columns */
3830     if (csize == PETSC_DECIDE) {
3831       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3832       if (mglobal == n) { /* square matrix */
3833         nlocal = m;
3834       } else {
3835         nlocal = n/size + ((n % size) > rank);
3836       }
3837     } else {
3838       nlocal = csize;
3839     }
3840     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3841     rstart = rend - nlocal;
3842     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3843 
3844     /* next, compute all the lengths */
3845     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3846     olens = dlens + m;
3847     for (i=0; i<m; i++) {
3848       jend = ii[i+1] - ii[i];
3849       olen = 0;
3850       dlen = 0;
3851       for (j=0; j<jend; j++) {
3852         if (*jj < rstart || *jj >= rend) olen++;
3853         else dlen++;
3854         jj++;
3855       }
3856       olens[i] = olen;
3857       dlens[i] = dlen;
3858     }
3859     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3860     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3861     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3862     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3863     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3864     ierr = PetscFree(dlens);CHKERRQ(ierr);
3865   } else {
3866     PetscInt ml,nl;
3867 
3868     M    = *newmat;
3869     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3870     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3871     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3872     /*
3873          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3874        rather than the slower MatSetValues().
3875     */
3876     M->was_assembled = PETSC_TRUE;
3877     M->assembled     = PETSC_FALSE;
3878   }
3879   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3880   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3881   ii   = aij->i;
3882   jj   = aij->j;
3883   aa   = aij->a;
3884   for (i=0; i<m; i++) {
3885     row   = rstart + i;
3886     nz    = ii[i+1] - ii[i];
3887     cwork = jj;     jj += nz;
3888     vwork = aa;     aa += nz;
3889     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3890   }
3891 
3892   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3894   *newmat = M;
3895 
3896   /* save submatrix used in processor for next request */
3897   if (call ==  MAT_INITIAL_MATRIX) {
3898     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3899     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3900   }
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3905 {
3906   PetscInt       m,cstart, cend,j,nnz,i,d;
3907   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3908   const PetscInt *JJ;
3909   PetscScalar    *values;
3910   PetscErrorCode ierr;
3911   PetscBool      nooffprocentries;
3912 
3913   PetscFunctionBegin;
3914   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3915 
3916   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3917   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3918   m      = B->rmap->n;
3919   cstart = B->cmap->rstart;
3920   cend   = B->cmap->rend;
3921   rstart = B->rmap->rstart;
3922 
3923   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3924 
3925 #if defined(PETSC_USE_DEBUG)
3926   for (i=0; i<m && Ii; i++) {
3927     nnz = Ii[i+1]- Ii[i];
3928     JJ  = J + Ii[i];
3929     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3930     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3931     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3932   }
3933 #endif
3934 
3935   for (i=0; i<m && Ii; i++) {
3936     nnz     = Ii[i+1]- Ii[i];
3937     JJ      = J + Ii[i];
3938     nnz_max = PetscMax(nnz_max,nnz);
3939     d       = 0;
3940     for (j=0; j<nnz; j++) {
3941       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3942     }
3943     d_nnz[i] = d;
3944     o_nnz[i] = nnz - d;
3945   }
3946   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3947   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3948 
3949   if (v) values = (PetscScalar*)v;
3950   else {
3951     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3952   }
3953 
3954   for (i=0; i<m && Ii; i++) {
3955     ii   = i + rstart;
3956     nnz  = Ii[i+1]- Ii[i];
3957     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3958   }
3959   nooffprocentries    = B->nooffprocentries;
3960   B->nooffprocentries = PETSC_TRUE;
3961   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3962   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   B->nooffprocentries = nooffprocentries;
3964 
3965   if (!v) {
3966     ierr = PetscFree(values);CHKERRQ(ierr);
3967   }
3968   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3969   PetscFunctionReturn(0);
3970 }
3971 
3972 /*@
3973    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3974    (the default parallel PETSc format).
3975 
3976    Collective
3977 
3978    Input Parameters:
3979 +  B - the matrix
3980 .  i - the indices into j for the start of each local row (starts with zero)
3981 .  j - the column indices for each local row (starts with zero)
3982 -  v - optional values in the matrix
3983 
3984    Level: developer
3985 
3986    Notes:
3987        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3988      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3989      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3990 
3991        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3992 
3993        The format which is used for the sparse matrix input, is equivalent to a
3994     row-major ordering.. i.e for the following matrix, the input data expected is
3995     as shown
3996 
3997 $        1 0 0
3998 $        2 0 3     P0
3999 $       -------
4000 $        4 5 6     P1
4001 $
4002 $     Process0 [P0]: rows_owned=[0,1]
4003 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4004 $        j =  {0,0,2}  [size = 3]
4005 $        v =  {1,2,3}  [size = 3]
4006 $
4007 $     Process1 [P1]: rows_owned=[2]
4008 $        i =  {0,3}    [size = nrow+1  = 1+1]
4009 $        j =  {0,1,2}  [size = 3]
4010 $        v =  {4,5,6}  [size = 3]
4011 
4012 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4013           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4014 @*/
4015 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4016 {
4017   PetscErrorCode ierr;
4018 
4019   PetscFunctionBegin;
4020   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4021   PetscFunctionReturn(0);
4022 }
4023 
4024 /*@C
4025    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4026    (the default parallel PETSc format).  For good matrix assembly performance
4027    the user should preallocate the matrix storage by setting the parameters
4028    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4029    performance can be increased by more than a factor of 50.
4030 
4031    Collective
4032 
4033    Input Parameters:
4034 +  B - the matrix
4035 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4036            (same value is used for all local rows)
4037 .  d_nnz - array containing the number of nonzeros in the various rows of the
4038            DIAGONAL portion of the local submatrix (possibly different for each row)
4039            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4040            The size of this array is equal to the number of local rows, i.e 'm'.
4041            For matrices that will be factored, you must leave room for (and set)
4042            the diagonal entry even if it is zero.
4043 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4044            submatrix (same value is used for all local rows).
4045 -  o_nnz - array containing the number of nonzeros in the various rows of the
4046            OFF-DIAGONAL portion of the local submatrix (possibly different for
4047            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4048            structure. The size of this array is equal to the number
4049            of local rows, i.e 'm'.
4050 
4051    If the *_nnz parameter is given then the *_nz parameter is ignored
4052 
4053    The AIJ format (also called the Yale sparse matrix format or
4054    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4055    storage.  The stored row and column indices begin with zero.
4056    See Users-Manual: ch_mat for details.
4057 
4058    The parallel matrix is partitioned such that the first m0 rows belong to
4059    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4060    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4061 
4062    The DIAGONAL portion of the local submatrix of a processor can be defined
4063    as the submatrix which is obtained by extraction the part corresponding to
4064    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4065    first row that belongs to the processor, r2 is the last row belonging to
4066    the this processor, and c1-c2 is range of indices of the local part of a
4067    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4068    common case of a square matrix, the row and column ranges are the same and
4069    the DIAGONAL part is also square. The remaining portion of the local
4070    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4071 
4072    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4073 
4074    You can call MatGetInfo() to get information on how effective the preallocation was;
4075    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4076    You can also run with the option -info and look for messages with the string
4077    malloc in them to see if additional memory allocation was needed.
4078 
4079    Example usage:
4080 
4081    Consider the following 8x8 matrix with 34 non-zero values, that is
4082    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4083    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4084    as follows:
4085 
4086 .vb
4087             1  2  0  |  0  3  0  |  0  4
4088     Proc0   0  5  6  |  7  0  0  |  8  0
4089             9  0 10  | 11  0  0  | 12  0
4090     -------------------------------------
4091            13  0 14  | 15 16 17  |  0  0
4092     Proc1   0 18  0  | 19 20 21  |  0  0
4093             0  0  0  | 22 23  0  | 24  0
4094     -------------------------------------
4095     Proc2  25 26 27  |  0  0 28  | 29  0
4096            30  0  0  | 31 32 33  |  0 34
4097 .ve
4098 
4099    This can be represented as a collection of submatrices as:
4100 
4101 .vb
4102       A B C
4103       D E F
4104       G H I
4105 .ve
4106 
4107    Where the submatrices A,B,C are owned by proc0, D,E,F are
4108    owned by proc1, G,H,I are owned by proc2.
4109 
4110    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4111    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4112    The 'M','N' parameters are 8,8, and have the same values on all procs.
4113 
4114    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4115    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4116    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4117    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4118    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4119    matrix, ans [DF] as another SeqAIJ matrix.
4120 
4121    When d_nz, o_nz parameters are specified, d_nz storage elements are
4122    allocated for every row of the local diagonal submatrix, and o_nz
4123    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4124    One way to choose d_nz and o_nz is to use the max nonzerors per local
4125    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4126    In this case, the values of d_nz,o_nz are:
4127 .vb
4128      proc0 : dnz = 2, o_nz = 2
4129      proc1 : dnz = 3, o_nz = 2
4130      proc2 : dnz = 1, o_nz = 4
4131 .ve
4132    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4133    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4134    for proc3. i.e we are using 12+15+10=37 storage locations to store
4135    34 values.
4136 
4137    When d_nnz, o_nnz parameters are specified, the storage is specified
4138    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4139    In the above case the values for d_nnz,o_nnz are:
4140 .vb
4141      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4142      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4143      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4144 .ve
4145    Here the space allocated is sum of all the above values i.e 34, and
4146    hence pre-allocation is perfect.
4147 
4148    Level: intermediate
4149 
4150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4151           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4152 @*/
4153 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4154 {
4155   PetscErrorCode ierr;
4156 
4157   PetscFunctionBegin;
4158   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4159   PetscValidType(B,1);
4160   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 /*@
4165      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4166          CSR format the local rows.
4167 
4168    Collective
4169 
4170    Input Parameters:
4171 +  comm - MPI communicator
4172 .  m - number of local rows (Cannot be PETSC_DECIDE)
4173 .  n - This value should be the same as the local size used in creating the
4174        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4175        calculated if N is given) For square matrices n is almost always m.
4176 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4177 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4178 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4179 .   j - column indices
4180 -   a - matrix values
4181 
4182    Output Parameter:
4183 .   mat - the matrix
4184 
4185    Level: intermediate
4186 
4187    Notes:
4188        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4189      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4190      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4191 
4192        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4193 
4194        The format which is used for the sparse matrix input, is equivalent to a
4195     row-major ordering.. i.e for the following matrix, the input data expected is
4196     as shown
4197 
4198 $        1 0 0
4199 $        2 0 3     P0
4200 $       -------
4201 $        4 5 6     P1
4202 $
4203 $     Process0 [P0]: rows_owned=[0,1]
4204 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4205 $        j =  {0,0,2}  [size = 3]
4206 $        v =  {1,2,3}  [size = 3]
4207 $
4208 $     Process1 [P1]: rows_owned=[2]
4209 $        i =  {0,3}    [size = nrow+1  = 1+1]
4210 $        j =  {0,1,2}  [size = 3]
4211 $        v =  {4,5,6}  [size = 3]
4212 
4213 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4214           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4215 @*/
4216 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4217 {
4218   PetscErrorCode ierr;
4219 
4220   PetscFunctionBegin;
4221   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4222   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4223   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4224   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4225   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4226   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4227   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4228   PetscFunctionReturn(0);
4229 }
4230 
4231 /*@C
4232    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4233    (the default parallel PETSc format).  For good matrix assembly performance
4234    the user should preallocate the matrix storage by setting the parameters
4235    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4236    performance can be increased by more than a factor of 50.
4237 
4238    Collective
4239 
4240    Input Parameters:
4241 +  comm - MPI communicator
4242 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4243            This value should be the same as the local size used in creating the
4244            y vector for the matrix-vector product y = Ax.
4245 .  n - This value should be the same as the local size used in creating the
4246        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4247        calculated if N is given) For square matrices n is almost always m.
4248 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4249 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4250 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4251            (same value is used for all local rows)
4252 .  d_nnz - array containing the number of nonzeros in the various rows of the
4253            DIAGONAL portion of the local submatrix (possibly different for each row)
4254            or NULL, if d_nz is used to specify the nonzero structure.
4255            The size of this array is equal to the number of local rows, i.e 'm'.
4256 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4257            submatrix (same value is used for all local rows).
4258 -  o_nnz - array containing the number of nonzeros in the various rows of the
4259            OFF-DIAGONAL portion of the local submatrix (possibly different for
4260            each row) or NULL, if o_nz is used to specify the nonzero
4261            structure. The size of this array is equal to the number
4262            of local rows, i.e 'm'.
4263 
4264    Output Parameter:
4265 .  A - the matrix
4266 
4267    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4268    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4269    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4270 
4271    Notes:
4272    If the *_nnz parameter is given then the *_nz parameter is ignored
4273 
4274    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4275    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4276    storage requirements for this matrix.
4277 
4278    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4279    processor than it must be used on all processors that share the object for
4280    that argument.
4281 
4282    The user MUST specify either the local or global matrix dimensions
4283    (possibly both).
4284 
4285    The parallel matrix is partitioned across processors such that the
4286    first m0 rows belong to process 0, the next m1 rows belong to
4287    process 1, the next m2 rows belong to process 2 etc.. where
4288    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4289    values corresponding to [m x N] submatrix.
4290 
4291    The columns are logically partitioned with the n0 columns belonging
4292    to 0th partition, the next n1 columns belonging to the next
4293    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4294 
4295    The DIAGONAL portion of the local submatrix on any given processor
4296    is the submatrix corresponding to the rows and columns m,n
4297    corresponding to the given processor. i.e diagonal matrix on
4298    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4299    etc. The remaining portion of the local submatrix [m x (N-n)]
4300    constitute the OFF-DIAGONAL portion. The example below better
4301    illustrates this concept.
4302 
4303    For a square global matrix we define each processor's diagonal portion
4304    to be its local rows and the corresponding columns (a square submatrix);
4305    each processor's off-diagonal portion encompasses the remainder of the
4306    local matrix (a rectangular submatrix).
4307 
4308    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4309 
4310    When calling this routine with a single process communicator, a matrix of
4311    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4312    type of communicator, use the construction mechanism
4313 .vb
4314      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4315 .ve
4316 
4317 $     MatCreate(...,&A);
4318 $     MatSetType(A,MATMPIAIJ);
4319 $     MatSetSizes(A, m,n,M,N);
4320 $     MatMPIAIJSetPreallocation(A,...);
4321 
4322    By default, this format uses inodes (identical nodes) when possible.
4323    We search for consecutive rows with the same nonzero structure, thereby
4324    reusing matrix information to achieve increased efficiency.
4325 
4326    Options Database Keys:
4327 +  -mat_no_inode  - Do not use inodes
4328 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4329 
4330 
4331 
4332    Example usage:
4333 
4334    Consider the following 8x8 matrix with 34 non-zero values, that is
4335    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4336    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4337    as follows
4338 
4339 .vb
4340             1  2  0  |  0  3  0  |  0  4
4341     Proc0   0  5  6  |  7  0  0  |  8  0
4342             9  0 10  | 11  0  0  | 12  0
4343     -------------------------------------
4344            13  0 14  | 15 16 17  |  0  0
4345     Proc1   0 18  0  | 19 20 21  |  0  0
4346             0  0  0  | 22 23  0  | 24  0
4347     -------------------------------------
4348     Proc2  25 26 27  |  0  0 28  | 29  0
4349            30  0  0  | 31 32 33  |  0 34
4350 .ve
4351 
4352    This can be represented as a collection of submatrices as
4353 
4354 .vb
4355       A B C
4356       D E F
4357       G H I
4358 .ve
4359 
4360    Where the submatrices A,B,C are owned by proc0, D,E,F are
4361    owned by proc1, G,H,I are owned by proc2.
4362 
4363    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4364    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4365    The 'M','N' parameters are 8,8, and have the same values on all procs.
4366 
4367    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4368    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4369    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4370    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4371    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4372    matrix, ans [DF] as another SeqAIJ matrix.
4373 
4374    When d_nz, o_nz parameters are specified, d_nz storage elements are
4375    allocated for every row of the local diagonal submatrix, and o_nz
4376    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4377    One way to choose d_nz and o_nz is to use the max nonzerors per local
4378    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4379    In this case, the values of d_nz,o_nz are
4380 .vb
4381      proc0 : dnz = 2, o_nz = 2
4382      proc1 : dnz = 3, o_nz = 2
4383      proc2 : dnz = 1, o_nz = 4
4384 .ve
4385    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4386    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4387    for proc3. i.e we are using 12+15+10=37 storage locations to store
4388    34 values.
4389 
4390    When d_nnz, o_nnz parameters are specified, the storage is specified
4391    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4392    In the above case the values for d_nnz,o_nnz are
4393 .vb
4394      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4395      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4396      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4397 .ve
4398    Here the space allocated is sum of all the above values i.e 34, and
4399    hence pre-allocation is perfect.
4400 
4401    Level: intermediate
4402 
4403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4404           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4405 @*/
4406 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4407 {
4408   PetscErrorCode ierr;
4409   PetscMPIInt    size;
4410 
4411   PetscFunctionBegin;
4412   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4413   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4415   if (size > 1) {
4416     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4417     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4418   } else {
4419     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4420     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4421   }
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4426 {
4427   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4428   PetscBool      flg;
4429   PetscErrorCode ierr;
4430 
4431   PetscFunctionBegin;
4432   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4433   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4434   if (Ad)     *Ad     = a->A;
4435   if (Ao)     *Ao     = a->B;
4436   if (colmap) *colmap = a->garray;
4437   PetscFunctionReturn(0);
4438 }
4439 
4440 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4441 {
4442   PetscErrorCode ierr;
4443   PetscInt       m,N,i,rstart,nnz,Ii;
4444   PetscInt       *indx;
4445   PetscScalar    *values;
4446 
4447   PetscFunctionBegin;
4448   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4449   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4450     PetscInt       *dnz,*onz,sum,bs,cbs;
4451 
4452     if (n == PETSC_DECIDE) {
4453       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4454     }
4455     /* Check sum(n) = N */
4456     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4457     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4458 
4459     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4460     rstart -= m;
4461 
4462     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4463     for (i=0; i<m; i++) {
4464       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4465       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4466       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4467     }
4468 
4469     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4470     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4471     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4472     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4473     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4474     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4475     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4476     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4477   }
4478 
4479   /* numeric phase */
4480   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4481   for (i=0; i<m; i++) {
4482     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4483     Ii   = i + rstart;
4484     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4485     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4486   }
4487   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4488   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4489   PetscFunctionReturn(0);
4490 }
4491 
4492 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4493 {
4494   PetscErrorCode    ierr;
4495   PetscMPIInt       rank;
4496   PetscInt          m,N,i,rstart,nnz;
4497   size_t            len;
4498   const PetscInt    *indx;
4499   PetscViewer       out;
4500   char              *name;
4501   Mat               B;
4502   const PetscScalar *values;
4503 
4504   PetscFunctionBegin;
4505   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4506   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4507   /* Should this be the type of the diagonal block of A? */
4508   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4509   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4510   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4511   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4512   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4513   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4514   for (i=0; i<m; i++) {
4515     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4517     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4518   }
4519   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521 
4522   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4523   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4524   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4525   sprintf(name,"%s.%d",outfile,rank);
4526   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4527   ierr = PetscFree(name);CHKERRQ(ierr);
4528   ierr = MatView(B,out);CHKERRQ(ierr);
4529   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4530   ierr = MatDestroy(&B);CHKERRQ(ierr);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4535 {
4536   PetscErrorCode      ierr;
4537   Mat_Merge_SeqsToMPI *merge;
4538   PetscContainer      container;
4539 
4540   PetscFunctionBegin;
4541   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4542   if (container) {
4543     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4556     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4557     ierr = PetscFree(merge);CHKERRQ(ierr);
4558     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4559   }
4560   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 #include <../src/mat/utils/freespace.h>
4565 #include <petscbt.h>
4566 
4567 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4568 {
4569   PetscErrorCode      ierr;
4570   MPI_Comm            comm;
4571   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4572   PetscMPIInt         size,rank,taga,*len_s;
4573   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4574   PetscInt            proc,m;
4575   PetscInt            **buf_ri,**buf_rj;
4576   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4577   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4578   MPI_Request         *s_waits,*r_waits;
4579   MPI_Status          *status;
4580   MatScalar           *aa=a->a;
4581   MatScalar           **abuf_r,*ba_i;
4582   Mat_Merge_SeqsToMPI *merge;
4583   PetscContainer      container;
4584 
4585   PetscFunctionBegin;
4586   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4587   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4588 
4589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4590   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4591 
4592   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4593   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4594 
4595   bi     = merge->bi;
4596   bj     = merge->bj;
4597   buf_ri = merge->buf_ri;
4598   buf_rj = merge->buf_rj;
4599 
4600   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4601   owners = merge->rowmap->range;
4602   len_s  = merge->len_s;
4603 
4604   /* send and recv matrix values */
4605   /*-----------------------------*/
4606   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4607   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4608 
4609   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4610   for (proc=0,k=0; proc<size; proc++) {
4611     if (!len_s[proc]) continue;
4612     i    = owners[proc];
4613     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4614     k++;
4615   }
4616 
4617   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4618   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4619   ierr = PetscFree(status);CHKERRQ(ierr);
4620 
4621   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4622   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4623 
4624   /* insert mat values of mpimat */
4625   /*----------------------------*/
4626   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4627   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4628 
4629   for (k=0; k<merge->nrecv; k++) {
4630     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4631     nrows       = *(buf_ri_k[k]);
4632     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4633     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4634   }
4635 
4636   /* set values of ba */
4637   m = merge->rowmap->n;
4638   for (i=0; i<m; i++) {
4639     arow = owners[rank] + i;
4640     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4641     bnzi = bi[i+1] - bi[i];
4642     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4643 
4644     /* add local non-zero vals of this proc's seqmat into ba */
4645     anzi   = ai[arow+1] - ai[arow];
4646     aj     = a->j + ai[arow];
4647     aa     = a->a + ai[arow];
4648     nextaj = 0;
4649     for (j=0; nextaj<anzi; j++) {
4650       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4651         ba_i[j] += aa[nextaj++];
4652       }
4653     }
4654 
4655     /* add received vals into ba */
4656     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4657       /* i-th row */
4658       if (i == *nextrow[k]) {
4659         anzi   = *(nextai[k]+1) - *nextai[k];
4660         aj     = buf_rj[k] + *(nextai[k]);
4661         aa     = abuf_r[k] + *(nextai[k]);
4662         nextaj = 0;
4663         for (j=0; nextaj<anzi; j++) {
4664           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4665             ba_i[j] += aa[nextaj++];
4666           }
4667         }
4668         nextrow[k]++; nextai[k]++;
4669       }
4670     }
4671     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4672   }
4673   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4674   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4675 
4676   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4677   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4678   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4679   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4680   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4681   PetscFunctionReturn(0);
4682 }
4683 
4684 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4685 {
4686   PetscErrorCode      ierr;
4687   Mat                 B_mpi;
4688   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4689   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4690   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4691   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4692   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4693   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4694   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4695   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4696   MPI_Status          *status;
4697   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4698   PetscBT             lnkbt;
4699   Mat_Merge_SeqsToMPI *merge;
4700   PetscContainer      container;
4701 
4702   PetscFunctionBegin;
4703   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4704 
4705   /* make sure it is a PETSc comm */
4706   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4707   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4709 
4710   ierr = PetscNew(&merge);CHKERRQ(ierr);
4711   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4712 
4713   /* determine row ownership */
4714   /*---------------------------------------------------------*/
4715   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4716   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4717   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4718   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4719   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4720   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4721   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4722 
4723   m      = merge->rowmap->n;
4724   owners = merge->rowmap->range;
4725 
4726   /* determine the number of messages to send, their lengths */
4727   /*---------------------------------------------------------*/
4728   len_s = merge->len_s;
4729 
4730   len          = 0; /* length of buf_si[] */
4731   merge->nsend = 0;
4732   for (proc=0; proc<size; proc++) {
4733     len_si[proc] = 0;
4734     if (proc == rank) {
4735       len_s[proc] = 0;
4736     } else {
4737       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4738       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4739     }
4740     if (len_s[proc]) {
4741       merge->nsend++;
4742       nrows = 0;
4743       for (i=owners[proc]; i<owners[proc+1]; i++) {
4744         if (ai[i+1] > ai[i]) nrows++;
4745       }
4746       len_si[proc] = 2*(nrows+1);
4747       len         += len_si[proc];
4748     }
4749   }
4750 
4751   /* determine the number and length of messages to receive for ij-structure */
4752   /*-------------------------------------------------------------------------*/
4753   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4754   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4755 
4756   /* post the Irecv of j-structure */
4757   /*-------------------------------*/
4758   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4759   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4760 
4761   /* post the Isend of j-structure */
4762   /*--------------------------------*/
4763   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4764 
4765   for (proc=0, k=0; proc<size; proc++) {
4766     if (!len_s[proc]) continue;
4767     i    = owners[proc];
4768     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4769     k++;
4770   }
4771 
4772   /* receives and sends of j-structure are complete */
4773   /*------------------------------------------------*/
4774   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4775   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4776 
4777   /* send and recv i-structure */
4778   /*---------------------------*/
4779   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4780   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4781 
4782   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4783   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4784   for (proc=0,k=0; proc<size; proc++) {
4785     if (!len_s[proc]) continue;
4786     /* form outgoing message for i-structure:
4787          buf_si[0]:                 nrows to be sent
4788                [1:nrows]:           row index (global)
4789                [nrows+1:2*nrows+1]: i-structure index
4790     */
4791     /*-------------------------------------------*/
4792     nrows       = len_si[proc]/2 - 1;
4793     buf_si_i    = buf_si + nrows+1;
4794     buf_si[0]   = nrows;
4795     buf_si_i[0] = 0;
4796     nrows       = 0;
4797     for (i=owners[proc]; i<owners[proc+1]; i++) {
4798       anzi = ai[i+1] - ai[i];
4799       if (anzi) {
4800         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4801         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4802         nrows++;
4803       }
4804     }
4805     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4806     k++;
4807     buf_si += len_si[proc];
4808   }
4809 
4810   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4811   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4812 
4813   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4814   for (i=0; i<merge->nrecv; i++) {
4815     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4816   }
4817 
4818   ierr = PetscFree(len_si);CHKERRQ(ierr);
4819   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4820   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4821   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4822   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4823   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4824   ierr = PetscFree(status);CHKERRQ(ierr);
4825 
4826   /* compute a local seq matrix in each processor */
4827   /*----------------------------------------------*/
4828   /* allocate bi array and free space for accumulating nonzero column info */
4829   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4830   bi[0] = 0;
4831 
4832   /* create and initialize a linked list */
4833   nlnk = N+1;
4834   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4835 
4836   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4837   len  = ai[owners[rank+1]] - ai[owners[rank]];
4838   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4839 
4840   current_space = free_space;
4841 
4842   /* determine symbolic info for each local row */
4843   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4844 
4845   for (k=0; k<merge->nrecv; k++) {
4846     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4847     nrows       = *buf_ri_k[k];
4848     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4849     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4850   }
4851 
4852   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4853   len  = 0;
4854   for (i=0; i<m; i++) {
4855     bnzi = 0;
4856     /* add local non-zero cols of this proc's seqmat into lnk */
4857     arow  = owners[rank] + i;
4858     anzi  = ai[arow+1] - ai[arow];
4859     aj    = a->j + ai[arow];
4860     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4861     bnzi += nlnk;
4862     /* add received col data into lnk */
4863     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4864       if (i == *nextrow[k]) { /* i-th row */
4865         anzi  = *(nextai[k]+1) - *nextai[k];
4866         aj    = buf_rj[k] + *nextai[k];
4867         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4868         bnzi += nlnk;
4869         nextrow[k]++; nextai[k]++;
4870       }
4871     }
4872     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4873 
4874     /* if free space is not available, make more free space */
4875     if (current_space->local_remaining<bnzi) {
4876       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4877       nspacedouble++;
4878     }
4879     /* copy data into free space, then initialize lnk */
4880     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4881     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4882 
4883     current_space->array           += bnzi;
4884     current_space->local_used      += bnzi;
4885     current_space->local_remaining -= bnzi;
4886 
4887     bi[i+1] = bi[i] + bnzi;
4888   }
4889 
4890   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4891 
4892   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4893   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4894   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4895 
4896   /* create symbolic parallel matrix B_mpi */
4897   /*---------------------------------------*/
4898   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4899   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4900   if (n==PETSC_DECIDE) {
4901     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4902   } else {
4903     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4904   }
4905   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4906   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4907   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4908   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4909   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4910 
4911   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4912   B_mpi->assembled    = PETSC_FALSE;
4913   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4914   merge->bi           = bi;
4915   merge->bj           = bj;
4916   merge->buf_ri       = buf_ri;
4917   merge->buf_rj       = buf_rj;
4918   merge->coi          = NULL;
4919   merge->coj          = NULL;
4920   merge->owners_co    = NULL;
4921 
4922   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4923 
4924   /* attach the supporting struct to B_mpi for reuse */
4925   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4926   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4927   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4928   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4929   *mpimat = B_mpi;
4930 
4931   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 /*@C
4936       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4937                  matrices from each processor
4938 
4939     Collective
4940 
4941    Input Parameters:
4942 +    comm - the communicators the parallel matrix will live on
4943 .    seqmat - the input sequential matrices
4944 .    m - number of local rows (or PETSC_DECIDE)
4945 .    n - number of local columns (or PETSC_DECIDE)
4946 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4947 
4948    Output Parameter:
4949 .    mpimat - the parallel matrix generated
4950 
4951     Level: advanced
4952 
4953    Notes:
4954      The dimensions of the sequential matrix in each processor MUST be the same.
4955      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4956      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4957 @*/
4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4959 {
4960   PetscErrorCode ierr;
4961   PetscMPIInt    size;
4962 
4963   PetscFunctionBegin;
4964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4965   if (size == 1) {
4966     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4967     if (scall == MAT_INITIAL_MATRIX) {
4968       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4969     } else {
4970       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4971     }
4972     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4973     PetscFunctionReturn(0);
4974   }
4975   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4976   if (scall == MAT_INITIAL_MATRIX) {
4977     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4978   }
4979   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4980   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 /*@
4985      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4986           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4987           with MatGetSize()
4988 
4989     Not Collective
4990 
4991    Input Parameters:
4992 +    A - the matrix
4993 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4994 
4995    Output Parameter:
4996 .    A_loc - the local sequential matrix generated
4997 
4998     Level: developer
4999 
5000 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5001 
5002 @*/
5003 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5004 {
5005   PetscErrorCode ierr;
5006   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5007   Mat_SeqAIJ     *mat,*a,*b;
5008   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5009   MatScalar      *aa,*ba,*cam;
5010   PetscScalar    *ca;
5011   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5012   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5013   PetscBool      match;
5014   MPI_Comm       comm;
5015   PetscMPIInt    size;
5016 
5017   PetscFunctionBegin;
5018   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5019   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5020   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5022   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5023 
5024   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5025   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5026   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5027   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5028   aa = a->a; ba = b->a;
5029   if (scall == MAT_INITIAL_MATRIX) {
5030     if (size == 1) {
5031       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5032       PetscFunctionReturn(0);
5033     }
5034 
5035     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5036     ci[0] = 0;
5037     for (i=0; i<am; i++) {
5038       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5039     }
5040     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5041     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5042     k    = 0;
5043     for (i=0; i<am; i++) {
5044       ncols_o = bi[i+1] - bi[i];
5045       ncols_d = ai[i+1] - ai[i];
5046       /* off-diagonal portion of A */
5047       for (jo=0; jo<ncols_o; jo++) {
5048         col = cmap[*bj];
5049         if (col >= cstart) break;
5050         cj[k]   = col; bj++;
5051         ca[k++] = *ba++;
5052       }
5053       /* diagonal portion of A */
5054       for (j=0; j<ncols_d; j++) {
5055         cj[k]   = cstart + *aj++;
5056         ca[k++] = *aa++;
5057       }
5058       /* off-diagonal portion of A */
5059       for (j=jo; j<ncols_o; j++) {
5060         cj[k]   = cmap[*bj++];
5061         ca[k++] = *ba++;
5062       }
5063     }
5064     /* put together the new matrix */
5065     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5066     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5067     /* Since these are PETSc arrays, change flags to free them as necessary. */
5068     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5069     mat->free_a  = PETSC_TRUE;
5070     mat->free_ij = PETSC_TRUE;
5071     mat->nonew   = 0;
5072   } else if (scall == MAT_REUSE_MATRIX) {
5073     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5074     ci = mat->i; cj = mat->j; cam = mat->a;
5075     for (i=0; i<am; i++) {
5076       /* off-diagonal portion of A */
5077       ncols_o = bi[i+1] - bi[i];
5078       for (jo=0; jo<ncols_o; jo++) {
5079         col = cmap[*bj];
5080         if (col >= cstart) break;
5081         *cam++ = *ba++; bj++;
5082       }
5083       /* diagonal portion of A */
5084       ncols_d = ai[i+1] - ai[i];
5085       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5086       /* off-diagonal portion of A */
5087       for (j=jo; j<ncols_o; j++) {
5088         *cam++ = *ba++; bj++;
5089       }
5090     }
5091   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5092   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 /*@C
5097      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5098 
5099     Not Collective
5100 
5101    Input Parameters:
5102 +    A - the matrix
5103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 -    row, col - index sets of rows and columns to extract (or NULL)
5105 
5106    Output Parameter:
5107 .    A_loc - the local sequential matrix generated
5108 
5109     Level: developer
5110 
5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5112 
5113 @*/
5114 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5115 {
5116   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5117   PetscErrorCode ierr;
5118   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5119   IS             isrowa,iscola;
5120   Mat            *aloc;
5121   PetscBool      match;
5122 
5123   PetscFunctionBegin;
5124   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5125   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5126   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5127   if (!row) {
5128     start = A->rmap->rstart; end = A->rmap->rend;
5129     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5130   } else {
5131     isrowa = *row;
5132   }
5133   if (!col) {
5134     start = A->cmap->rstart;
5135     cmap  = a->garray;
5136     nzA   = a->A->cmap->n;
5137     nzB   = a->B->cmap->n;
5138     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5139     ncols = 0;
5140     for (i=0; i<nzB; i++) {
5141       if (cmap[i] < start) idx[ncols++] = cmap[i];
5142       else break;
5143     }
5144     imark = i;
5145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5148   } else {
5149     iscola = *col;
5150   }
5151   if (scall != MAT_INITIAL_MATRIX) {
5152     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5153     aloc[0] = *A_loc;
5154   }
5155   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5156   if (!col) { /* attach global id of condensed columns */
5157     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5158   }
5159   *A_loc = aloc[0];
5160   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5161   if (!row) {
5162     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5163   }
5164   if (!col) {
5165     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5166   }
5167   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5168   PetscFunctionReturn(0);
5169 }
5170 
5171 /*@C
5172     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5173 
5174     Collective on Mat
5175 
5176    Input Parameters:
5177 +    A,B - the matrices in mpiaij format
5178 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5179 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5180 
5181    Output Parameter:
5182 +    rowb, colb - index sets of rows and columns of B to extract
5183 -    B_seq - the sequential matrix generated
5184 
5185     Level: developer
5186 
5187 @*/
5188 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5189 {
5190   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5191   PetscErrorCode ierr;
5192   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5193   IS             isrowb,iscolb;
5194   Mat            *bseq=NULL;
5195 
5196   PetscFunctionBegin;
5197   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5198     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5199   }
5200   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5201 
5202   if (scall == MAT_INITIAL_MATRIX) {
5203     start = A->cmap->rstart;
5204     cmap  = a->garray;
5205     nzA   = a->A->cmap->n;
5206     nzB   = a->B->cmap->n;
5207     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5208     ncols = 0;
5209     for (i=0; i<nzB; i++) {  /* row < local row index */
5210       if (cmap[i] < start) idx[ncols++] = cmap[i];
5211       else break;
5212     }
5213     imark = i;
5214     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5215     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5216     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5217     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5218   } else {
5219     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5220     isrowb  = *rowb; iscolb = *colb;
5221     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5222     bseq[0] = *B_seq;
5223   }
5224   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5225   *B_seq = bseq[0];
5226   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5227   if (!rowb) {
5228     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5229   } else {
5230     *rowb = isrowb;
5231   }
5232   if (!colb) {
5233     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5234   } else {
5235     *colb = iscolb;
5236   }
5237   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5238   PetscFunctionReturn(0);
5239 }
5240 
5241 /*
5242     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5243     of the OFF-DIAGONAL portion of local A
5244 
5245     Collective on Mat
5246 
5247    Input Parameters:
5248 +    A,B - the matrices in mpiaij format
5249 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5250 
5251    Output Parameter:
5252 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5253 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5254 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5255 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5256 
5257     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5258      for this matrix. This is not desirable..
5259 
5260     Level: developer
5261 
5262 */
5263 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5264 {
5265   PetscErrorCode         ierr;
5266   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5267   Mat_SeqAIJ             *b_oth;
5268   VecScatter             ctx;
5269   MPI_Comm               comm;
5270   const PetscMPIInt      *rprocs,*sprocs;
5271   const PetscInt         *srow,*rstarts,*sstarts;
5272   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5273   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5274   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5275   MPI_Request            *rwaits = NULL,*swaits = NULL;
5276   MPI_Status             rstatus;
5277   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5278 
5279   PetscFunctionBegin;
5280   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5282 
5283   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5284     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5285   }
5286   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5287   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5288 
5289   if (size == 1) {
5290     startsj_s = NULL;
5291     bufa_ptr  = NULL;
5292     *B_oth    = NULL;
5293     PetscFunctionReturn(0);
5294   }
5295 
5296   ctx = a->Mvctx;
5297   tag = ((PetscObject)ctx)->tag;
5298 
5299   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5300   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5301   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5302   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5303   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5304   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5305   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5306 
5307   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5308   if (scall == MAT_INITIAL_MATRIX) {
5309     /* i-array */
5310     /*---------*/
5311     /*  post receives */
5312     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5313     for (i=0; i<nrecvs; i++) {
5314       rowlen = rvalues + rstarts[i]*rbs;
5315       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5316       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5317     }
5318 
5319     /* pack the outgoing message */
5320     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5321 
5322     sstartsj[0] = 0;
5323     rstartsj[0] = 0;
5324     len         = 0; /* total length of j or a array to be sent */
5325     if (nsends) {
5326       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5327       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5328     }
5329     for (i=0; i<nsends; i++) {
5330       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5331       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5332       for (j=0; j<nrows; j++) {
5333         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5334         for (l=0; l<sbs; l++) {
5335           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5336 
5337           rowlen[j*sbs+l] = ncols;
5338 
5339           len += ncols;
5340           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5341         }
5342         k++;
5343       }
5344       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5345 
5346       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5347     }
5348     /* recvs and sends of i-array are completed */
5349     i = nrecvs;
5350     while (i--) {
5351       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5352     }
5353     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5354     ierr = PetscFree(svalues);CHKERRQ(ierr);
5355 
5356     /* allocate buffers for sending j and a arrays */
5357     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5358     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5359 
5360     /* create i-array of B_oth */
5361     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5362 
5363     b_othi[0] = 0;
5364     len       = 0; /* total length of j or a array to be received */
5365     k         = 0;
5366     for (i=0; i<nrecvs; i++) {
5367       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5368       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5369       for (j=0; j<nrows; j++) {
5370         b_othi[k+1] = b_othi[k] + rowlen[j];
5371         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5372         k++;
5373       }
5374       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5375     }
5376     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5377 
5378     /* allocate space for j and a arrrays of B_oth */
5379     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5380     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5381 
5382     /* j-array */
5383     /*---------*/
5384     /*  post receives of j-array */
5385     for (i=0; i<nrecvs; i++) {
5386       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5387       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5388     }
5389 
5390     /* pack the outgoing message j-array */
5391     if (nsends) k = sstarts[0];
5392     for (i=0; i<nsends; i++) {
5393       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5394       bufJ  = bufj+sstartsj[i];
5395       for (j=0; j<nrows; j++) {
5396         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5397         for (ll=0; ll<sbs; ll++) {
5398           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5399           for (l=0; l<ncols; l++) {
5400             *bufJ++ = cols[l];
5401           }
5402           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5403         }
5404       }
5405       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5406     }
5407 
5408     /* recvs and sends of j-array are completed */
5409     i = nrecvs;
5410     while (i--) {
5411       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5412     }
5413     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5414   } else if (scall == MAT_REUSE_MATRIX) {
5415     sstartsj = *startsj_s;
5416     rstartsj = *startsj_r;
5417     bufa     = *bufa_ptr;
5418     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5419     b_otha   = b_oth->a;
5420   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5421 
5422   /* a-array */
5423   /*---------*/
5424   /*  post receives of a-array */
5425   for (i=0; i<nrecvs; i++) {
5426     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5427     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5428   }
5429 
5430   /* pack the outgoing message a-array */
5431   if (nsends) k = sstarts[0];
5432   for (i=0; i<nsends; i++) {
5433     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5434     bufA  = bufa+sstartsj[i];
5435     for (j=0; j<nrows; j++) {
5436       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5437       for (ll=0; ll<sbs; ll++) {
5438         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5439         for (l=0; l<ncols; l++) {
5440           *bufA++ = vals[l];
5441         }
5442         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5443       }
5444     }
5445     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5446   }
5447   /* recvs and sends of a-array are completed */
5448   i = nrecvs;
5449   while (i--) {
5450     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5451   }
5452   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5453   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5454 
5455   if (scall == MAT_INITIAL_MATRIX) {
5456     /* put together the new matrix */
5457     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5458 
5459     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5460     /* Since these are PETSc arrays, change flags to free them as necessary. */
5461     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5462     b_oth->free_a  = PETSC_TRUE;
5463     b_oth->free_ij = PETSC_TRUE;
5464     b_oth->nonew   = 0;
5465 
5466     ierr = PetscFree(bufj);CHKERRQ(ierr);
5467     if (!startsj_s || !bufa_ptr) {
5468       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5469       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5470     } else {
5471       *startsj_s = sstartsj;
5472       *startsj_r = rstartsj;
5473       *bufa_ptr  = bufa;
5474     }
5475   }
5476 
5477   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5478   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5479   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 /*@C
5484   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5485 
5486   Not Collective
5487 
5488   Input Parameters:
5489 . A - The matrix in mpiaij format
5490 
5491   Output Parameter:
5492 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5493 . colmap - A map from global column index to local index into lvec
5494 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5495 
5496   Level: developer
5497 
5498 @*/
5499 #if defined(PETSC_USE_CTABLE)
5500 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5501 #else
5502 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5503 #endif
5504 {
5505   Mat_MPIAIJ *a;
5506 
5507   PetscFunctionBegin;
5508   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5509   PetscValidPointer(lvec, 2);
5510   PetscValidPointer(colmap, 3);
5511   PetscValidPointer(multScatter, 4);
5512   a = (Mat_MPIAIJ*) A->data;
5513   if (lvec) *lvec = a->lvec;
5514   if (colmap) *colmap = a->colmap;
5515   if (multScatter) *multScatter = a->Mvctx;
5516   PetscFunctionReturn(0);
5517 }
5518 
5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5520 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5521 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5522 #if defined(PETSC_HAVE_MKL_SPARSE)
5523 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5524 #endif
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5526 #if defined(PETSC_HAVE_ELEMENTAL)
5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5528 #endif
5529 #if defined(PETSC_HAVE_HYPRE)
5530 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5531 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5532 #endif
5533 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5534 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5535 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5536 
5537 /*
5538     Computes (B'*A')' since computing B*A directly is untenable
5539 
5540                n                       p                          p
5541         (              )       (              )         (                  )
5542       m (      A       )  *  n (       B      )   =   m (         C        )
5543         (              )       (              )         (                  )
5544 
5545 */
5546 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5547 {
5548   PetscErrorCode ierr;
5549   Mat            At,Bt,Ct;
5550 
5551   PetscFunctionBegin;
5552   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5553   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5554   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5555   ierr = MatDestroy(&At);CHKERRQ(ierr);
5556   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5557   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5558   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5559   PetscFunctionReturn(0);
5560 }
5561 
5562 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5563 {
5564   PetscErrorCode ierr;
5565   PetscInt       m=A->rmap->n,n=B->cmap->n;
5566   Mat            Cmat;
5567 
5568   PetscFunctionBegin;
5569   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5570   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5571   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5572   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5573   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5574   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5575   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5576   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5577 
5578   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5579 
5580   *C = Cmat;
5581   PetscFunctionReturn(0);
5582 }
5583 
5584 /* ----------------------------------------------------------------*/
5585 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5586 {
5587   PetscErrorCode ierr;
5588 
5589   PetscFunctionBegin;
5590   if (scall == MAT_INITIAL_MATRIX) {
5591     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5592     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5593     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5594   }
5595   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5596   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5597   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 /*MC
5602    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5603 
5604    Options Database Keys:
5605 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5606 
5607   Level: beginner
5608 
5609 .seealso: MatCreateAIJ()
5610 M*/
5611 
5612 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5613 {
5614   Mat_MPIAIJ     *b;
5615   PetscErrorCode ierr;
5616   PetscMPIInt    size;
5617 
5618   PetscFunctionBegin;
5619   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5620 
5621   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5622   B->data       = (void*)b;
5623   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5624   B->assembled  = PETSC_FALSE;
5625   B->insertmode = NOT_SET_VALUES;
5626   b->size       = size;
5627 
5628   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5629 
5630   /* build cache for off array entries formed */
5631   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5632 
5633   b->donotstash  = PETSC_FALSE;
5634   b->colmap      = 0;
5635   b->garray      = 0;
5636   b->roworiented = PETSC_TRUE;
5637 
5638   /* stuff used for matrix vector multiply */
5639   b->lvec  = NULL;
5640   b->Mvctx = NULL;
5641 
5642   /* stuff for MatGetRow() */
5643   b->rowindices   = 0;
5644   b->rowvalues    = 0;
5645   b->getrowactive = PETSC_FALSE;
5646 
5647   /* flexible pointer used in CUSP/CUSPARSE classes */
5648   b->spptr = NULL;
5649 
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5660 #if defined(PETSC_HAVE_MKL_SPARSE)
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5662 #endif
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5665 #if defined(PETSC_HAVE_ELEMENTAL)
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5667 #endif
5668 #if defined(PETSC_HAVE_HYPRE)
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5670 #endif
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5676 #if defined(PETSC_HAVE_HYPRE)
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5678 #endif
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5680   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5681   PetscFunctionReturn(0);
5682 }
5683 
5684 /*@C
5685      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5686          and "off-diagonal" part of the matrix in CSR format.
5687 
5688    Collective
5689 
5690    Input Parameters:
5691 +  comm - MPI communicator
5692 .  m - number of local rows (Cannot be PETSC_DECIDE)
5693 .  n - This value should be the same as the local size used in creating the
5694        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5695        calculated if N is given) For square matrices n is almost always m.
5696 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5697 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5698 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5699 .   j - column indices
5700 .   a - matrix values
5701 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5702 .   oj - column indices
5703 -   oa - matrix values
5704 
5705    Output Parameter:
5706 .   mat - the matrix
5707 
5708    Level: advanced
5709 
5710    Notes:
5711        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5712        must free the arrays once the matrix has been destroyed and not before.
5713 
5714        The i and j indices are 0 based
5715 
5716        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5717 
5718        This sets local rows and cannot be used to set off-processor values.
5719 
5720        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5721        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5722        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5723        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5724        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5725        communication if it is known that only local entries will be set.
5726 
5727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5728           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5729 @*/
5730 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5731 {
5732   PetscErrorCode ierr;
5733   Mat_MPIAIJ     *maij;
5734 
5735   PetscFunctionBegin;
5736   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5737   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5738   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5739   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5740   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5741   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5742   maij = (Mat_MPIAIJ*) (*mat)->data;
5743 
5744   (*mat)->preallocated = PETSC_TRUE;
5745 
5746   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5747   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5748 
5749   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5750   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5751 
5752   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5753   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5754   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5755   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5756 
5757   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5758   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5759   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5760   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5761   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5762   PetscFunctionReturn(0);
5763 }
5764 
5765 /*
5766     Special version for direct calls from Fortran
5767 */
5768 #include <petsc/private/fortranimpl.h>
5769 
5770 /* Change these macros so can be used in void function */
5771 #undef CHKERRQ
5772 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5773 #undef SETERRQ2
5774 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5775 #undef SETERRQ3
5776 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5777 #undef SETERRQ
5778 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5779 
5780 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5781 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5782 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5783 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5784 #else
5785 #endif
5786 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5787 {
5788   Mat            mat  = *mmat;
5789   PetscInt       m    = *mm, n = *mn;
5790   InsertMode     addv = *maddv;
5791   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5792   PetscScalar    value;
5793   PetscErrorCode ierr;
5794 
5795   MatCheckPreallocated(mat,1);
5796   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5797 
5798 #if defined(PETSC_USE_DEBUG)
5799   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5800 #endif
5801   {
5802     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5803     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5804     PetscBool roworiented = aij->roworiented;
5805 
5806     /* Some Variables required in the macro */
5807     Mat        A                 = aij->A;
5808     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5809     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5810     MatScalar  *aa               = a->a;
5811     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5812     Mat        B                 = aij->B;
5813     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5814     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5815     MatScalar  *ba               = b->a;
5816 
5817     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5818     PetscInt  nonew = a->nonew;
5819     MatScalar *ap1,*ap2;
5820 
5821     PetscFunctionBegin;
5822     for (i=0; i<m; i++) {
5823       if (im[i] < 0) continue;
5824 #if defined(PETSC_USE_DEBUG)
5825       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5826 #endif
5827       if (im[i] >= rstart && im[i] < rend) {
5828         row      = im[i] - rstart;
5829         lastcol1 = -1;
5830         rp1      = aj + ai[row];
5831         ap1      = aa + ai[row];
5832         rmax1    = aimax[row];
5833         nrow1    = ailen[row];
5834         low1     = 0;
5835         high1    = nrow1;
5836         lastcol2 = -1;
5837         rp2      = bj + bi[row];
5838         ap2      = ba + bi[row];
5839         rmax2    = bimax[row];
5840         nrow2    = bilen[row];
5841         low2     = 0;
5842         high2    = nrow2;
5843 
5844         for (j=0; j<n; j++) {
5845           if (roworiented) value = v[i*n+j];
5846           else value = v[i+j*m];
5847           if (in[j] >= cstart && in[j] < cend) {
5848             col = in[j] - cstart;
5849             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5850             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5851           } else if (in[j] < 0) continue;
5852 #if defined(PETSC_USE_DEBUG)
5853           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5854           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5855 #endif
5856           else {
5857             if (mat->was_assembled) {
5858               if (!aij->colmap) {
5859                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5860               }
5861 #if defined(PETSC_USE_CTABLE)
5862               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5863               col--;
5864 #else
5865               col = aij->colmap[in[j]] - 1;
5866 #endif
5867               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5868               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5869                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5870                 col  =  in[j];
5871                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5872                 B     = aij->B;
5873                 b     = (Mat_SeqAIJ*)B->data;
5874                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5875                 rp2   = bj + bi[row];
5876                 ap2   = ba + bi[row];
5877                 rmax2 = bimax[row];
5878                 nrow2 = bilen[row];
5879                 low2  = 0;
5880                 high2 = nrow2;
5881                 bm    = aij->B->rmap->n;
5882                 ba    = b->a;
5883               }
5884             } else col = in[j];
5885             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5886           }
5887         }
5888       } else if (!aij->donotstash) {
5889         if (roworiented) {
5890           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5891         } else {
5892           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5893         }
5894       }
5895     }
5896   }
5897   PetscFunctionReturnVoid();
5898 }
5899