xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 52c8aa5d04d6f20c57c028aad0c4c9bbea65119a)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {
526           if (roworiented) value = v[i*n+j];
527           else             value = v[i+j*m];
528         } else value = 0.0;
529         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
530         if (in[j] >= cstart && in[j] < cend) {
531           col   = in[j] - cstart;
532           nonew = a->nonew;
533           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
534         } else if (in[j] < 0) continue;
535 #if defined(PETSC_USE_DEBUG)
536         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
537 #endif
538         else {
539           if (mat->was_assembled) {
540             if (!aij->colmap) {
541               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
542             }
543 #if defined(PETSC_USE_CTABLE)
544             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
545             col--;
546 #else
547             col = aij->colmap[in[j]] - 1;
548 #endif
549             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
550               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
551               col  =  in[j];
552               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
553               B     = aij->B;
554               b     = (Mat_SeqAIJ*)B->data;
555               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
556               rp2   = bj + bi[row];
557               ap2   = ba + bi[row];
558               rmax2 = bimax[row];
559               nrow2 = bilen[row];
560               low2  = 0;
561               high2 = nrow2;
562               bm    = aij->B->rmap->n;
563               ba    = b->a;
564             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
565           } else col = in[j];
566           nonew = b->nonew;
567           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
568         }
569       }
570     } else {
571       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
572       if (!aij->donotstash) {
573         mat->assembled = PETSC_FALSE;
574         if (roworiented) {
575           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
576         } else {
577           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         }
579       }
580     }
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatGetValues_MPIAIJ"
587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
588 {
589   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
590   PetscErrorCode ierr;
591   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
592   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
593 
594   PetscFunctionBegin;
595   for (i=0; i<m; i++) {
596     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
597     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
598     if (idxm[i] >= rstart && idxm[i] < rend) {
599       row = idxm[i] - rstart;
600       for (j=0; j<n; j++) {
601         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
602         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
603         if (idxn[j] >= cstart && idxn[j] < cend) {
604           col  = idxn[j] - cstart;
605           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
606         } else {
607           if (!aij->colmap) {
608             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609           }
610 #if defined(PETSC_USE_CTABLE)
611           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
612           col--;
613 #else
614           col = aij->colmap[idxn[j]] - 1;
615 #endif
616           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
617           else {
618             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
619           }
620         }
621       }
622     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
632 {
633   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
634   PetscErrorCode ierr;
635   PetscInt       nstash,reallocs;
636   InsertMode     addv;
637 
638   PetscFunctionBegin;
639   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
640 
641   /* make sure all processors are either in INSERTMODE or ADDMODE */
642   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
643   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
644   mat->insertmode = addv; /* in case this processor had no cache */
645 
646   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
647   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
648   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
655 {
656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
657   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
658   PetscErrorCode ierr;
659   PetscMPIInt    n;
660   PetscInt       i,j,rstart,ncols,flg;
661   PetscInt       *row,*col;
662   PetscBool      other_disassembled;
663   PetscScalar    *val;
664   InsertMode     addv = mat->insertmode;
665 
666   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
667 
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n; ) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) {
677           if (row[j] != rstart) break;
678         }
679         if (j < n) ncols = j-i;
680         else       ncols = n-i;
681         /* Now assemble all these values with a single function call */
682         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
683 
684         i = j;
685       }
686     }
687     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
688   }
689   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
690   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
691 
692   /* determine if any processor has disassembled, if so we must
693      also disassemble ourselfs, in order that we may reassemble. */
694   /*
695      if nonzero structure of submatrix B cannot change then we know that
696      no processor disassembled thus we can skip this stuff
697   */
698   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
699     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
700     if (mat->was_assembled && !other_disassembled) {
701       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
702     }
703   }
704   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
705     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
706   }
707   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
708   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
709   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
710 
711   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
712 
713   aij->rowvalues = 0;
714 
715   /* used by MatAXPY() */
716   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
717   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
718 
719   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
720   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
721 
722   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
723   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
724     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
725     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
748   PetscInt      *owners = A->rmap->range;
749   PetscInt       n      = A->rmap->n;
750   PetscMPIInt    size   = mat->size;
751   PetscSF        sf;
752   PetscInt      *lrows;
753   PetscSFNode   *rrows;
754   PetscInt       lastidx = -1, r, p = 0, len = 0;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   /* Create SF where leaves are input rows and roots are owned rows */
759   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
760   for (r = 0; r < n; ++r) lrows[r] = -1;
761   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
762   for (r = 0; r < N; ++r) {
763     const PetscInt idx   = rows[r];
764     PetscBool      found = PETSC_FALSE;
765     /* Trick for efficient searching for sorted rows */
766     if (lastidx > idx) p = 0;
767     lastidx = idx;
768     for (; p < size; ++p) {
769       if (idx >= owners[p] && idx < owners[p+1]) {
770         rrows[r].rank  = p;
771         rrows[r].index = rows[r] - owners[p];
772         found = PETSC_TRUE;
773         break;
774       }
775     }
776     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
777   }
778   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
779   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
780   /* Collect flags for rows to be zeroed */
781   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
782   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
783   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
784   /* Compress and put in row numbers */
785   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
786   /* fix right hand side if needed */
787   if (x && b) {
788     const PetscScalar *xx;
789     PetscScalar       *bb;
790 
791     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
796   }
797   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
798   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
799   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
800     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
801   } else if (diag != 0.0) {
802     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
803     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
804     for (r = 0; r < len; ++r) {
805       const PetscInt row = lrows[r] + A->rmap->rstart;
806       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
807     }
808     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810   } else {
811     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   }
813   ierr = PetscFree(lrows);CHKERRQ(ierr);
814 
815   /* only change matrix nonzero state if pattern was allowed to be changed */
816   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
817     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
818     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
826 {
827   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
828   PetscErrorCode    ierr;
829   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
830   PetscInt          i,j,r,m,p = 0,len = 0;
831   PetscInt          *lrows,*owners = A->rmap->range;
832   PetscSFNode       *rrows;
833   PetscSF           sf;
834   const PetscScalar *xx;
835   PetscScalar       *bb,*mask;
836   Vec               xmask,lmask;
837   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
838   const PetscInt    *aj, *ii,*ridx;
839   PetscScalar       *aa;
840 #if defined(PETSC_DEBUG)
841   PetscBool found = PETSC_FALSE;
842 #endif
843 
844   PetscFunctionBegin;
845   /* Create SF where leaves are input rows and roots are owned rows */
846   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
847   for (r = 0; r < n; ++r) lrows[r] = -1;
848   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
849   for (r = 0; r < N; ++r) {
850     const PetscInt idx   = rows[r];
851     PetscBool      found = PETSC_FALSE;
852     /* Trick for efficient searching for sorted rows */
853     if (lastidx > idx) p = 0;
854     lastidx = idx;
855     for (; p < size; ++p) {
856       if (idx >= owners[p] && idx < owners[p+1]) {
857         rrows[r].rank  = p;
858         rrows[r].index = rows[r] - owners[p];
859         found = PETSC_TRUE;
860         break;
861       }
862     }
863     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
864   }
865   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
866   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
867   /* Collect flags for rows to be zeroed */
868   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
870   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
871   /* Compress and put in row numbers */
872   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
873   /* zero diagonal part of matrix */
874   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
875   /* handle off diagonal part of matrix */
876   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
877   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
878   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
879   for (i=0; i<len; i++) bb[lrows[i]] = 1;
880   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
881   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
884   if (x) {
885     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
888     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
889   }
890   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
891   /* remove zeroed rows of off diagonal matrix */
892   ii = aij->i;
893   for (i=0; i<len; i++) {
894     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
895   }
896   /* loop over all elements of off process part of matrix zeroing removed columns*/
897   if (aij->compressedrow.use) {
898     m    = aij->compressedrow.nrows;
899     ii   = aij->compressedrow.i;
900     ridx = aij->compressedrow.rindex;
901     for (i=0; i<m; i++) {
902       n  = ii[i+1] - ii[i];
903       aj = aij->j + ii[i];
904       aa = aij->a + ii[i];
905 
906       for (j=0; j<n; j++) {
907         if (PetscAbsScalar(mask[*aj])) {
908           if (b) bb[*ridx] -= *aa*xx[*aj];
909           *aa = 0.0;
910         }
911         aa++;
912         aj++;
913       }
914       ridx++;
915     }
916   } else { /* do not use compressed row format */
917     m = l->B->rmap->n;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij->a + ii[i];
922       for (j=0; j<n; j++) {
923         if (PetscAbsScalar(mask[*aj])) {
924           if (b) bb[i] -= *aa*xx[*aj];
925           *aa = 0.0;
926         }
927         aa++;
928         aj++;
929       }
930     }
931   }
932   if (x) {
933     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
934     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
935   }
936   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
937   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
938   ierr = PetscFree(lrows);CHKERRQ(ierr);
939 
940   /* only change matrix nonzero state if pattern was allowed to be changed */
941   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
942     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
943     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMult_MPIAIJ"
950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
951 {
952   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
953   PetscErrorCode ierr;
954   PetscInt       nt;
955 
956   PetscFunctionBegin;
957   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
958   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
959   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
961   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatMultAdd_MPIAIJ"
980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999   PetscBool      merged;
1000 
1001   PetscFunctionBegin;
1002   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   if (!merged) {
1006     /* send it on its way */
1007     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008     /* do local part */
1009     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1010     /* receive remote parts: note this assumes the values are not actually */
1011     /* added in yy until the next line, */
1012     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1013   } else {
1014     /* do local part */
1015     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1016     /* send it on its way */
1017     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1018     /* values actually were received in the Begin() but we need to call this nop */
1019     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1026 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1027 {
1028   MPI_Comm       comm;
1029   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1030   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1031   IS             Me,Notme;
1032   PetscErrorCode ierr;
1033   PetscInt       M,N,first,last,*notme,i;
1034   PetscMPIInt    size;
1035 
1036   PetscFunctionBegin;
1037   /* Easy test: symmetric diagonal block */
1038   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1039   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   /* do nondiagonal part */
1075   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1076   /* send it on its way */
1077   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1078   /* do local part */
1079   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1080   /* receive remote parts */
1081   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 /*
1086   This only works correctly for square matrices where the subblock A->A is the
1087    diagonal block
1088 */
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1092 {
1093   PetscErrorCode ierr;
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095 
1096   PetscFunctionBegin;
1097   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1098   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1099   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatScale_MPIAIJ"
1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1112   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatDestroy_MPIAIJ"
1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1119 {
1120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(PETSC_USE_LOG)
1125   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1126 #endif
1127   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1131 #if defined(PETSC_USE_CTABLE)
1132   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1133 #else
1134   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1135 #endif
1136   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1137   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1139   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1140   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1141   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1142 
1143   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1157 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1158 {
1159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1160   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1161   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1162   PetscErrorCode ierr;
1163   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1164   int            fd;
1165   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1166   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1167   PetscScalar    *column_values;
1168   PetscInt       message_count,flowcontrolcount;
1169   FILE           *file;
1170 
1171   PetscFunctionBegin;
1172   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1173   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1174   nz   = A->nz + B->nz;
1175   if (!rank) {
1176     header[0] = MAT_FILE_CLASSID;
1177     header[1] = mat->rmap->N;
1178     header[2] = mat->cmap->N;
1179 
1180     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1182     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     /* get largest number of rows any processor has */
1184     rlen  = mat->rmap->n;
1185     range = mat->rmap->range;
1186     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1187   } else {
1188     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1189     rlen = mat->rmap->n;
1190   }
1191 
1192   /* load up the local row counts */
1193   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1194   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1195 
1196   /* store the row lengths to the file */
1197   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1198   if (!rank) {
1199     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1200     for (i=1; i<size; i++) {
1201       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1202       rlen = range[i+1] - range[i];
1203       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1204       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     }
1206     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1207   } else {
1208     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1209     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1211   }
1212   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1213 
1214   /* load up the local column indices */
1215   nzmax = nz; /* th processor needs space a largest processor needs */
1216   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1218   cnt   = 0;
1219   for (i=0; i<mat->rmap->n; i++) {
1220     for (j=B->i[i]; j<B->i[i+1]; j++) {
1221       if ((col = garray[B->j[j]]) > cstart) break;
1222       column_indices[cnt++] = col;
1223     }
1224     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1225     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1226   }
1227   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1228 
1229   /* store the column indices to the file */
1230   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1231   if (!rank) {
1232     MPI_Status status;
1233     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1234     for (i=1; i<size; i++) {
1235       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1236       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1237       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1238       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1240     }
1241     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1242   } else {
1243     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1244     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1249 
1250   /* load up the local column values */
1251   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1252   cnt  = 0;
1253   for (i=0; i<mat->rmap->n; i++) {
1254     for (j=B->i[i]; j<B->i[i+1]; j++) {
1255       if (garray[B->j[j]] > cstart) break;
1256       column_values[cnt++] = B->a[j];
1257     }
1258     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1259     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1260   }
1261   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1262 
1263   /* store the column values to the file */
1264   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1265   if (!rank) {
1266     MPI_Status status;
1267     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1268     for (i=1; i<size; i++) {
1269       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1270       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1271       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1272       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1273       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1274     }
1275     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1276   } else {
1277     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1278     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1279     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1281   }
1282   ierr = PetscFree(column_values);CHKERRQ(ierr);
1283 
1284   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1285   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #include <petscdraw.h>
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1292 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1293 {
1294   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank = aij->rank,size = aij->size;
1297   PetscBool         isdraw,iascii,isbinary;
1298   PetscViewer       sviewer;
1299   PetscViewerFormat format;
1300 
1301   PetscFunctionBegin;
1302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1303   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1304   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1305   if (iascii) {
1306     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1307     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1308       MatInfo   info;
1309       PetscBool inodes;
1310 
1311       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1312       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1313       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1315       if (!inodes) {
1316         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1317                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1318       } else {
1319         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1320                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1321       }
1322       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1323       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1324       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1329       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1332       PetscInt inodecount,inodelimit,*inodes;
1333       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1334       if (inodes) {
1335         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1336       } else {
1337         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1338       }
1339       PetscFunctionReturn(0);
1340     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1341       PetscFunctionReturn(0);
1342     }
1343   } else if (isbinary) {
1344     if (size == 1) {
1345       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1346       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1347     } else {
1348       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1349     }
1350     PetscFunctionReturn(0);
1351   } else if (isdraw) {
1352     PetscDraw draw;
1353     PetscBool isnull;
1354     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1355     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1356   }
1357 
1358   {
1359     /* assemble the entire matrix onto first processor. */
1360     Mat        A;
1361     Mat_SeqAIJ *Aloc;
1362     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1363     MatScalar  *a;
1364 
1365     if (mat->rmap->N > 1024) {
1366       PetscBool flg = PETSC_FALSE;
1367 
1368       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1369       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1370     }
1371 
1372     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1373     if (!rank) {
1374       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1375     } else {
1376       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1377     }
1378     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1379     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1380     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1381     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1382     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1383 
1384     /* copy over the A part */
1385     Aloc = (Mat_SeqAIJ*)aij->A->data;
1386     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1387     row  = mat->rmap->rstart;
1388     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1389     for (i=0; i<m; i++) {
1390       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1391       row++;
1392       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1393     }
1394     aj = Aloc->j;
1395     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1396 
1397     /* copy over the B part */
1398     Aloc = (Mat_SeqAIJ*)aij->B->data;
1399     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1400     row  = mat->rmap->rstart;
1401     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1402     ct   = cols;
1403     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1408     }
1409     ierr = PetscFree(ct);CHKERRQ(ierr);
1410     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412     /*
1413        Everyone has to call to draw the matrix since the graphics waits are
1414        synchronized across all processors that share the PetscDraw object
1415     */
1416     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1417     if (!rank) {
1418       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1419     }
1420     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1421     ierr = MatDestroy(&A);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatView_MPIAIJ"
1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1429 {
1430   PetscErrorCode ierr;
1431   PetscBool      iascii,isdraw,issocket,isbinary;
1432 
1433   PetscFunctionBegin;
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1438   if (iascii || isdraw || isbinary || issocket) {
1439     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatSOR_MPIAIJ"
1446 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1447 {
1448   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1449   PetscErrorCode ierr;
1450   Vec            bb1 = 0;
1451   PetscBool      hasop;
1452 
1453   PetscFunctionBegin;
1454   if (flag == SOR_APPLY_UPPER) {
1455     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1456     PetscFunctionReturn(0);
1457   }
1458 
1459   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1460     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1461   }
1462 
1463   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1464     if (flag & SOR_ZERO_INITIAL_GUESS) {
1465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1466       its--;
1467     }
1468 
1469     while (its--) {
1470       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472 
1473       /* update rhs: bb1 = bb - B*x */
1474       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1475       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1476 
1477       /* local sweep */
1478       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1479     }
1480   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1481     if (flag & SOR_ZERO_INITIAL_GUESS) {
1482       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1483       its--;
1484     }
1485     while (its--) {
1486       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1488 
1489       /* update rhs: bb1 = bb - B*x */
1490       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1491       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1492 
1493       /* local sweep */
1494       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1495     }
1496   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1497     if (flag & SOR_ZERO_INITIAL_GUESS) {
1498       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1499       its--;
1500     }
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_EISENSTAT) {
1513     Vec xx1;
1514 
1515     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1516     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1517 
1518     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520     if (!mat->diag) {
1521       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1522       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1523     }
1524     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1525     if (hasop) {
1526       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1527     } else {
1528       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1529     }
1530     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1531 
1532     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1533 
1534     /* local sweep */
1535     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1536     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1537     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1538   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1539 
1540   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatPermute_MPIAIJ"
1546 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1547 {
1548   Mat            aA,aB,Aperm;
1549   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1550   PetscScalar    *aa,*ba;
1551   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1552   PetscSF        rowsf,sf;
1553   IS             parcolp = NULL;
1554   PetscBool      done;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1559   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1560   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1561   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1562 
1563   /* Invert row permutation to find out where my rows should go */
1564   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1565   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1566   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1567   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1568   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1570 
1571   /* Invert column permutation to find out where my columns should go */
1572   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1573   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1574   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1575   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1576   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1577   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1579 
1580   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1581   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1582   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1583 
1584   /* Find out where my gcols should go */
1585   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1586   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1588   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1589   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1590   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1591   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1592   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1593 
1594   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1595   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1596   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1597   for (i=0; i<m; i++) {
1598     PetscInt row = rdest[i],rowner;
1599     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1600     for (j=ai[i]; j<ai[i+1]; j++) {
1601       PetscInt cowner,col = cdest[aj[j]];
1602       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1603       if (rowner == cowner) dnnz[i]++;
1604       else onnz[i]++;
1605     }
1606     for (j=bi[i]; j<bi[i+1]; j++) {
1607       PetscInt cowner,col = gcdest[bj[j]];
1608       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1609       if (rowner == cowner) dnnz[i]++;
1610       else onnz[i]++;
1611     }
1612   }
1613   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1614   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1615   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1616   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1617   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1618 
1619   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1620   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1621   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1622   for (i=0; i<m; i++) {
1623     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1624     PetscInt j0,rowlen;
1625     rowlen = ai[i+1] - ai[i];
1626     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1627       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1628       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1629     }
1630     rowlen = bi[i+1] - bi[i];
1631     for (j0=j=0; j<rowlen; j0=j) {
1632       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1633       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1634     }
1635   }
1636   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1637   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1638   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1639   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1640   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1641   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1642   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1643   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1644   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1645   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1646   *B = Aperm;
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1652 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1653 {
1654   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1655   Mat            A    = mat->A,B = mat->B;
1656   PetscErrorCode ierr;
1657   PetscReal      isend[5],irecv[5];
1658 
1659   PetscFunctionBegin;
1660   info->block_size = 1.0;
1661   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1662 
1663   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1664   isend[3] = info->memory;  isend[4] = info->mallocs;
1665 
1666   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1667 
1668   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1669   isend[3] += info->memory;  isend[4] += info->mallocs;
1670   if (flag == MAT_LOCAL) {
1671     info->nz_used      = isend[0];
1672     info->nz_allocated = isend[1];
1673     info->nz_unneeded  = isend[2];
1674     info->memory       = isend[3];
1675     info->mallocs      = isend[4];
1676   } else if (flag == MAT_GLOBAL_MAX) {
1677     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1678 
1679     info->nz_used      = irecv[0];
1680     info->nz_allocated = irecv[1];
1681     info->nz_unneeded  = irecv[2];
1682     info->memory       = irecv[3];
1683     info->mallocs      = irecv[4];
1684   } else if (flag == MAT_GLOBAL_SUM) {
1685     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1686 
1687     info->nz_used      = irecv[0];
1688     info->nz_allocated = irecv[1];
1689     info->nz_unneeded  = irecv[2];
1690     info->memory       = irecv[3];
1691     info->mallocs      = irecv[4];
1692   }
1693   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1694   info->fill_ratio_needed = 0;
1695   info->factor_mallocs    = 0;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatSetOption_MPIAIJ"
1701 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1702 {
1703   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1704   PetscErrorCode ierr;
1705 
1706   PetscFunctionBegin;
1707   switch (op) {
1708   case MAT_NEW_NONZERO_LOCATIONS:
1709   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1710   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1711   case MAT_KEEP_NONZERO_PATTERN:
1712   case MAT_NEW_NONZERO_LOCATION_ERR:
1713   case MAT_USE_INODES:
1714   case MAT_IGNORE_ZERO_ENTRIES:
1715     MatCheckPreallocated(A,1);
1716     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1717     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1718     break;
1719   case MAT_ROW_ORIENTED:
1720     a->roworiented = flg;
1721 
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_NEW_DIAGONALS:
1726     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1727     break;
1728   case MAT_IGNORE_OFF_PROC_ENTRIES:
1729     a->donotstash = flg;
1730     break;
1731   case MAT_SPD:
1732     A->spd_set = PETSC_TRUE;
1733     A->spd     = flg;
1734     if (flg) {
1735       A->symmetric                  = PETSC_TRUE;
1736       A->structurally_symmetric     = PETSC_TRUE;
1737       A->symmetric_set              = PETSC_TRUE;
1738       A->structurally_symmetric_set = PETSC_TRUE;
1739     }
1740     break;
1741   case MAT_SYMMETRIC:
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_STRUCTURALLY_SYMMETRIC:
1745     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1746     break;
1747   case MAT_HERMITIAN:
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_SYMMETRY_ETERNAL:
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     break;
1753   default:
1754     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatGetRow_MPIAIJ"
1761 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1762 {
1763   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1764   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1765   PetscErrorCode ierr;
1766   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1767   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1768   PetscInt       *cmap,*idx_p;
1769 
1770   PetscFunctionBegin;
1771   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1772   mat->getrowactive = PETSC_TRUE;
1773 
1774   if (!mat->rowvalues && (idx || v)) {
1775     /*
1776         allocate enough space to hold information from the longest row.
1777     */
1778     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1779     PetscInt   max = 1,tmp;
1780     for (i=0; i<matin->rmap->n; i++) {
1781       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1782       if (max < tmp) max = tmp;
1783     }
1784     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1785   }
1786 
1787   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1788   lrow = row - rstart;
1789 
1790   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1791   if (!v)   {pvA = 0; pvB = 0;}
1792   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1793   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1794   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1795   nztot = nzA + nzB;
1796 
1797   cmap = mat->garray;
1798   if (v  || idx) {
1799     if (nztot) {
1800       /* Sort by increasing column numbers, assuming A and B already sorted */
1801       PetscInt imark = -1;
1802       if (v) {
1803         *v = v_p = mat->rowvalues;
1804         for (i=0; i<nzB; i++) {
1805           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1806           else break;
1807         }
1808         imark = i;
1809         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1810         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1811       }
1812       if (idx) {
1813         *idx = idx_p = mat->rowindices;
1814         if (imark > -1) {
1815           for (i=0; i<imark; i++) {
1816             idx_p[i] = cmap[cworkB[i]];
1817           }
1818         } else {
1819           for (i=0; i<nzB; i++) {
1820             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1821             else break;
1822           }
1823           imark = i;
1824         }
1825         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1826         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1827       }
1828     } else {
1829       if (idx) *idx = 0;
1830       if (v)   *v   = 0;
1831     }
1832   }
1833   *nz  = nztot;
1834   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1835   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1841 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1842 {
1843   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1844 
1845   PetscFunctionBegin;
1846   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1847   aij->getrowactive = PETSC_FALSE;
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatNorm_MPIAIJ"
1853 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1854 {
1855   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1856   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1857   PetscErrorCode ierr;
1858   PetscInt       i,j,cstart = mat->cmap->rstart;
1859   PetscReal      sum = 0.0;
1860   MatScalar      *v;
1861 
1862   PetscFunctionBegin;
1863   if (aij->size == 1) {
1864     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1865   } else {
1866     if (type == NORM_FROBENIUS) {
1867       v = amat->a;
1868       for (i=0; i<amat->nz; i++) {
1869         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1870       }
1871       v = bmat->a;
1872       for (i=0; i<bmat->nz; i++) {
1873         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1874       }
1875       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1876       *norm = PetscSqrtReal(*norm);
1877     } else if (type == NORM_1) { /* max column norm */
1878       PetscReal *tmp,*tmp2;
1879       PetscInt  *jj,*garray = aij->garray;
1880       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1881       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1882       *norm = 0.0;
1883       v     = amat->a; jj = amat->j;
1884       for (j=0; j<amat->nz; j++) {
1885         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1886       }
1887       v = bmat->a; jj = bmat->j;
1888       for (j=0; j<bmat->nz; j++) {
1889         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1890       }
1891       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1892       for (j=0; j<mat->cmap->N; j++) {
1893         if (tmp2[j] > *norm) *norm = tmp2[j];
1894       }
1895       ierr = PetscFree(tmp);CHKERRQ(ierr);
1896       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1897     } else if (type == NORM_INFINITY) { /* max row norm */
1898       PetscReal ntemp = 0.0;
1899       for (j=0; j<aij->A->rmap->n; j++) {
1900         v   = amat->a + amat->i[j];
1901         sum = 0.0;
1902         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1903           sum += PetscAbsScalar(*v); v++;
1904         }
1905         v = bmat->a + bmat->i[j];
1906         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1907           sum += PetscAbsScalar(*v); v++;
1908         }
1909         if (sum > ntemp) ntemp = sum;
1910       }
1911       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1912     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatTranspose_MPIAIJ"
1919 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1920 {
1921   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1922   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1923   PetscErrorCode ierr;
1924   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1925   PetscInt       cstart = A->cmap->rstart,ncol;
1926   Mat            B;
1927   MatScalar      *array;
1928 
1929   PetscFunctionBegin;
1930   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1931 
1932   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1933   ai = Aloc->i; aj = Aloc->j;
1934   bi = Bloc->i; bj = Bloc->j;
1935   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1936     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1937     PetscSFNode          *oloc;
1938     PETSC_UNUSED PetscSF sf;
1939 
1940     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1941     /* compute d_nnz for preallocation */
1942     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1943     for (i=0; i<ai[ma]; i++) {
1944       d_nnz[aj[i]]++;
1945       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1946     }
1947     /* compute local off-diagonal contributions */
1948     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1949     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1950     /* map those to global */
1951     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1952     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1953     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1954     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1955     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1956     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1957     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1958 
1959     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1960     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1961     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1963     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1964     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1965   } else {
1966     B    = *matout;
1967     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1968     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1969   }
1970 
1971   /* copy over the A part */
1972   array = Aloc->a;
1973   row   = A->rmap->rstart;
1974   for (i=0; i<ma; i++) {
1975     ncol = ai[i+1]-ai[i];
1976     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++;
1978     array += ncol; aj += ncol;
1979   }
1980   aj = Aloc->j;
1981   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1982 
1983   /* copy over the B part */
1984   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1985   array = Bloc->a;
1986   row   = A->rmap->rstart;
1987   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1988   cols_tmp = cols;
1989   for (i=0; i<mb; i++) {
1990     ncol = bi[i+1]-bi[i];
1991     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1992     row++;
1993     array += ncol; cols_tmp += ncol;
1994   }
1995   ierr = PetscFree(cols);CHKERRQ(ierr);
1996 
1997   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1999   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2000     *matout = B;
2001   } else {
2002     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2009 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2010 {
2011   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2012   Mat            a    = aij->A,b = aij->B;
2013   PetscErrorCode ierr;
2014   PetscInt       s1,s2,s3;
2015 
2016   PetscFunctionBegin;
2017   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2018   if (rr) {
2019     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2020     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2021     /* Overlap communication with computation. */
2022     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2023   }
2024   if (ll) {
2025     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2026     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2027     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2028   }
2029   /* scale  the diagonal block */
2030   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2031 
2032   if (rr) {
2033     /* Do a scatter end and then right scale the off-diagonal block */
2034     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2035     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2042 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2043 {
2044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatEqual_MPIAIJ"
2054 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2055 {
2056   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2057   Mat            a,b,c,d;
2058   PetscBool      flg;
2059   PetscErrorCode ierr;
2060 
2061   PetscFunctionBegin;
2062   a = matA->A; b = matA->B;
2063   c = matB->A; d = matB->B;
2064 
2065   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2066   if (flg) {
2067     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2068   }
2069   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatCopy_MPIAIJ"
2075 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2076 {
2077   PetscErrorCode ierr;
2078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2079   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2080 
2081   PetscFunctionBegin;
2082   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2083   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2084     /* because of the column compression in the off-processor part of the matrix a->B,
2085        the number of columns in a->B and b->B may be different, hence we cannot call
2086        the MatCopy() directly on the two parts. If need be, we can provide a more
2087        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2088        then copying the submatrices */
2089     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2090   } else {
2091     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2092     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2093   }
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatSetUp_MPIAIJ"
2099 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2100 {
2101   PetscErrorCode ierr;
2102 
2103   PetscFunctionBegin;
2104   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2112 {
2113   PetscInt       i,m=Y->rmap->N;
2114   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2115   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2116   const PetscInt *xi = x->i,*yi = y->i;
2117 
2118   PetscFunctionBegin;
2119   /* Set the number of nonzeros in the new matrix */
2120   for (i=0; i<m; i++) {
2121     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2122     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2123     nnz[i] = 0;
2124     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2125       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2126       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2127       nnz[i]++;
2128     }
2129     for (; k<nzy; k++) nnz[i]++;
2130   }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatAXPY_MPIAIJ"
2136 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2137 {
2138   PetscErrorCode ierr;
2139   PetscInt       i;
2140   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2141   PetscBLASInt   bnz,one=1;
2142   Mat_SeqAIJ     *x,*y;
2143 
2144   PetscFunctionBegin;
2145   if (str == SAME_NONZERO_PATTERN) {
2146     PetscScalar alpha = a;
2147     x    = (Mat_SeqAIJ*)xx->A->data;
2148     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2149     y    = (Mat_SeqAIJ*)yy->A->data;
2150     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2151     x    = (Mat_SeqAIJ*)xx->B->data;
2152     y    = (Mat_SeqAIJ*)yy->B->data;
2153     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2154     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2155   } else if (str == SUBSET_NONZERO_PATTERN) {
2156     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2157 
2158     x = (Mat_SeqAIJ*)xx->B->data;
2159     y = (Mat_SeqAIJ*)yy->B->data;
2160     if (y->xtoy && y->XtoY != xx->B) {
2161       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2162       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2163     }
2164     if (!y->xtoy) { /* get xtoy */
2165       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2166       y->XtoY = xx->B;
2167       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2168     }
2169     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2170   } else {
2171     Mat      B;
2172     PetscInt *nnz_d,*nnz_o;
2173     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2174     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2175     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2176     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2177     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2178     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2179     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2180     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2181     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2182     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2183     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2184     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2185     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2186     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2187   }
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatConjugate_MPIAIJ"
2195 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2196 {
2197 #if defined(PETSC_USE_COMPLEX)
2198   PetscErrorCode ierr;
2199   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2203   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2204 #else
2205   PetscFunctionBegin;
2206 #endif
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatRealPart_MPIAIJ"
2212 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2213 {
2214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2215   PetscErrorCode ierr;
2216 
2217   PetscFunctionBegin;
2218   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2219   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2225 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2226 {
2227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2228   PetscErrorCode ierr;
2229 
2230   PetscFunctionBegin;
2231   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2232   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #if defined(PETSC_HAVE_PBGL)
2237 
2238 #include <boost/parallel/mpi/bsp_process_group.hpp>
2239 #include <boost/graph/distributed/ilu_default_graph.hpp>
2240 #include <boost/graph/distributed/ilu_0_block.hpp>
2241 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2242 #include <boost/graph/distributed/petsc/interface.hpp>
2243 #include <boost/multi_array.hpp>
2244 #include <boost/parallel/distributed_property_map->hpp>
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2248 /*
2249   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2250 */
2251 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2252 {
2253   namespace petsc = boost::distributed::petsc;
2254 
2255   namespace graph_dist = boost::graph::distributed;
2256   using boost::graph::distributed::ilu_default::process_group_type;
2257   using boost::graph::ilu_permuted;
2258 
2259   PetscBool      row_identity, col_identity;
2260   PetscContainer c;
2261   PetscInt       m, n, M, N;
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2266   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2267   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2268   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2269 
2270   process_group_type pg;
2271   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2272   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2273   lgraph_type& level_graph = *lgraph_p;
2274   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2275 
2276   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2277   ilu_permuted(level_graph);
2278 
2279   /* put together the new matrix */
2280   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2281   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2282   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2283   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2284   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2285   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2286   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2287   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2288 
2289   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2290   ierr = PetscContainerSetPointer(c, lgraph_p);
2291   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2292   ierr = PetscContainerDestroy(&c);
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2298 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2299 {
2300   PetscFunctionBegin;
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatSolve_MPIAIJ"
2306 /*
2307   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2308 */
2309 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2310 {
2311   namespace graph_dist = boost::graph::distributed;
2312 
2313   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2314   lgraph_type    *lgraph_p;
2315   PetscContainer c;
2316   PetscErrorCode ierr;
2317 
2318   PetscFunctionBegin;
2319   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2320   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2321   ierr = VecCopy(b, x);CHKERRQ(ierr);
2322 
2323   PetscScalar *array_x;
2324   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2325   PetscInt sx;
2326   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2327 
2328   PetscScalar *array_b;
2329   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2330   PetscInt sb;
2331   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2332 
2333   lgraph_type& level_graph = *lgraph_p;
2334   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2335 
2336   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2337   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2338   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2339 
2340   typedef boost::iterator_property_map<array_ref_type::iterator,
2341                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2342   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2343   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2344 
2345   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2346   PetscFunctionReturn(0);
2347 }
2348 #endif
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatDestroy_MatRedundant"
2352 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2353 {
2354   PetscErrorCode ierr;
2355   Mat_Redundant  *redund;
2356   PetscInt       i;
2357   PetscMPIInt    size;
2358 
2359   PetscFunctionBegin;
2360   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2361   if (size == 1) {
2362     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2363     redund = a->redundant;
2364   } else {
2365     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2366     redund = a->redundant;
2367   }
2368   if (redund){
2369     if (redund->matseq) { /* via MatGetSubMatrices()  */
2370       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2371       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2372       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2373       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2374     } else {
2375       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2376       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2377       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2378       for (i=0; i<redund->nrecvs; i++) {
2379         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2380         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2381       }
2382       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2383     }
2384 
2385     if (redund->psubcomm) {
2386       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2387     }
2388     ierr = redund->Destroy(A);CHKERRQ(ierr);
2389     ierr = PetscFree(redund);CHKERRQ(ierr);
2390   }
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2396 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2397 {
2398   PetscMPIInt    rank,size;
2399   MPI_Comm       comm;
2400   PetscErrorCode ierr;
2401   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2402   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2403   PetscInt       *rowrange = mat->rmap->range;
2404   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2405   Mat            A = aij->A,B=aij->B,C=*matredundant;
2406   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2407   PetscScalar    *sbuf_a;
2408   PetscInt       nzlocal=a->nz+b->nz;
2409   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2410   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2411   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2412   MatScalar      *aworkA,*aworkB;
2413   PetscScalar    *vals;
2414   PetscMPIInt    tag1,tag2,tag3,imdex;
2415   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2416   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2417   MPI_Status     recv_status,*send_status;
2418   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2419   PetscInt       **rbuf_j=NULL;
2420   PetscScalar    **rbuf_a=NULL;
2421   Mat_Redundant  *redund =NULL;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2425   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2426   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2427   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2428   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2429 
2430   if (reuse == MAT_REUSE_MATRIX) {
2431     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2432     if (subsize == 1) {
2433       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2434       redund = c->redundant;
2435     } else {
2436       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2437       redund = c->redundant;
2438     }
2439     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2440 
2441     nsends    = redund->nsends;
2442     nrecvs    = redund->nrecvs;
2443     send_rank = redund->send_rank;
2444     recv_rank = redund->recv_rank;
2445     sbuf_nz   = redund->sbuf_nz;
2446     rbuf_nz   = redund->rbuf_nz;
2447     sbuf_j    = redund->sbuf_j;
2448     sbuf_a    = redund->sbuf_a;
2449     rbuf_j    = redund->rbuf_j;
2450     rbuf_a    = redund->rbuf_a;
2451   }
2452 
2453   if (reuse == MAT_INITIAL_MATRIX) {
2454     PetscInt    nleftover,np_subcomm;
2455 
2456     /* get the destination processors' id send_rank, nsends and nrecvs */
2457     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2458 
2459     np_subcomm = size/nsubcomm;
2460     nleftover  = size - nsubcomm*np_subcomm;
2461 
2462     /* block of codes below is specific for INTERLACED */
2463     /* ------------------------------------------------*/
2464     nsends = 0; nrecvs = 0;
2465     for (i=0; i<size; i++) {
2466       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2467         send_rank[nsends++] = i;
2468         recv_rank[nrecvs++] = i;
2469       }
2470     }
2471     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2472       i = size-nleftover-1;
2473       j = 0;
2474       while (j < nsubcomm - nleftover) {
2475         send_rank[nsends++] = i;
2476         i--; j++;
2477       }
2478     }
2479 
2480     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2481       for (i=0; i<nleftover; i++) {
2482         recv_rank[nrecvs++] = size-nleftover+i;
2483       }
2484     }
2485     /*----------------------------------------------*/
2486 
2487     /* allocate sbuf_j, sbuf_a */
2488     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2489     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2490     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2491     /*
2492     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2493     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2494      */
2495   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2496 
2497   /* copy mat's local entries into the buffers */
2498   if (reuse == MAT_INITIAL_MATRIX) {
2499     rownz_max = 0;
2500     rptr      = sbuf_j;
2501     cols      = sbuf_j + rend-rstart + 1;
2502     vals      = sbuf_a;
2503     rptr[0]   = 0;
2504     for (i=0; i<rend-rstart; i++) {
2505       row    = i + rstart;
2506       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2507       ncols  = nzA + nzB;
2508       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2509       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2510       /* load the column indices for this row into cols */
2511       lwrite = 0;
2512       for (l=0; l<nzB; l++) {
2513         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2514           vals[lwrite]   = aworkB[l];
2515           cols[lwrite++] = ctmp;
2516         }
2517       }
2518       for (l=0; l<nzA; l++) {
2519         vals[lwrite]   = aworkA[l];
2520         cols[lwrite++] = cstart + cworkA[l];
2521       }
2522       for (l=0; l<nzB; l++) {
2523         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2524           vals[lwrite]   = aworkB[l];
2525           cols[lwrite++] = ctmp;
2526         }
2527       }
2528       vals     += ncols;
2529       cols     += ncols;
2530       rptr[i+1] = rptr[i] + ncols;
2531       if (rownz_max < ncols) rownz_max = ncols;
2532     }
2533     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2534   } else { /* only copy matrix values into sbuf_a */
2535     rptr    = sbuf_j;
2536     vals    = sbuf_a;
2537     rptr[0] = 0;
2538     for (i=0; i<rend-rstart; i++) {
2539       row    = i + rstart;
2540       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2541       ncols  = nzA + nzB;
2542       cworkB = b->j + b->i[i];
2543       aworkA = a->a + a->i[i];
2544       aworkB = b->a + b->i[i];
2545       lwrite = 0;
2546       for (l=0; l<nzB; l++) {
2547         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2548       }
2549       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2550       for (l=0; l<nzB; l++) {
2551         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2552       }
2553       vals     += ncols;
2554       rptr[i+1] = rptr[i] + ncols;
2555     }
2556   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2557 
2558   /* send nzlocal to others, and recv other's nzlocal */
2559   /*--------------------------------------------------*/
2560   if (reuse == MAT_INITIAL_MATRIX) {
2561     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2562 
2563     s_waits2 = s_waits3 + nsends;
2564     s_waits1 = s_waits2 + nsends;
2565     r_waits1 = s_waits1 + nsends;
2566     r_waits2 = r_waits1 + nrecvs;
2567     r_waits3 = r_waits2 + nrecvs;
2568   } else {
2569     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2570 
2571     r_waits3 = s_waits3 + nsends;
2572   }
2573 
2574   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2575   if (reuse == MAT_INITIAL_MATRIX) {
2576     /* get new tags to keep the communication clean */
2577     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2578     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2579     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2580 
2581     /* post receives of other's nzlocal */
2582     for (i=0; i<nrecvs; i++) {
2583       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2584     }
2585     /* send nzlocal to others */
2586     for (i=0; i<nsends; i++) {
2587       sbuf_nz[i] = nzlocal;
2588       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2589     }
2590     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2591     count = nrecvs;
2592     while (count) {
2593       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2594 
2595       recv_rank[imdex] = recv_status.MPI_SOURCE;
2596       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2597       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2598 
2599       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2600 
2601       rbuf_nz[imdex] += i + 2;
2602 
2603       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2604       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2605       count--;
2606     }
2607     /* wait on sends of nzlocal */
2608     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2609     /* send mat->i,j to others, and recv from other's */
2610     /*------------------------------------------------*/
2611     for (i=0; i<nsends; i++) {
2612       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2613       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2614     }
2615     /* wait on receives of mat->i,j */
2616     /*------------------------------*/
2617     count = nrecvs;
2618     while (count) {
2619       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2620       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2621       count--;
2622     }
2623     /* wait on sends of mat->i,j */
2624     /*---------------------------*/
2625     if (nsends) {
2626       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2627     }
2628   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2629 
2630   /* post receives, send and receive mat->a */
2631   /*----------------------------------------*/
2632   for (imdex=0; imdex<nrecvs; imdex++) {
2633     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2634   }
2635   for (i=0; i<nsends; i++) {
2636     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2637   }
2638   count = nrecvs;
2639   while (count) {
2640     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2641     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2642     count--;
2643   }
2644   if (nsends) {
2645     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2646   }
2647 
2648   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2649 
2650   /* create redundant matrix */
2651   /*-------------------------*/
2652   if (reuse == MAT_INITIAL_MATRIX) {
2653     const PetscInt *range;
2654     PetscInt       rstart_sub,rend_sub,mloc_sub;
2655 
2656     /* compute rownz_max for preallocation */
2657     for (imdex=0; imdex<nrecvs; imdex++) {
2658       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2659       rptr = rbuf_j[imdex];
2660       for (i=0; i<j; i++) {
2661         ncols = rptr[i+1] - rptr[i];
2662         if (rownz_max < ncols) rownz_max = ncols;
2663       }
2664     }
2665 
2666     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2667 
2668     /* get local size of redundant matrix
2669        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2670     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2671     rstart_sub = range[nsubcomm*subrank];
2672     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2673       rend_sub = range[nsubcomm*(subrank+1)];
2674     } else {
2675       rend_sub = mat->rmap->N;
2676     }
2677     mloc_sub = rend_sub - rstart_sub;
2678 
2679     if (M == N) {
2680       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2681     } else { /* non-square matrix */
2682       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2683     }
2684     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2685     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2686     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2687     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2688   } else {
2689     C = *matredundant;
2690   }
2691 
2692   /* insert local matrix entries */
2693   rptr = sbuf_j;
2694   cols = sbuf_j + rend-rstart + 1;
2695   vals = sbuf_a;
2696   for (i=0; i<rend-rstart; i++) {
2697     row   = i + rstart;
2698     ncols = rptr[i+1] - rptr[i];
2699     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2700     vals += ncols;
2701     cols += ncols;
2702   }
2703   /* insert received matrix entries */
2704   for (imdex=0; imdex<nrecvs; imdex++) {
2705     rstart = rowrange[recv_rank[imdex]];
2706     rend   = rowrange[recv_rank[imdex]+1];
2707     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2708     rptr   = rbuf_j[imdex];
2709     cols   = rbuf_j[imdex] + rend-rstart + 1;
2710     vals   = rbuf_a[imdex];
2711     for (i=0; i<rend-rstart; i++) {
2712       row   = i + rstart;
2713       ncols = rptr[i+1] - rptr[i];
2714       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2715       vals += ncols;
2716       cols += ncols;
2717     }
2718   }
2719   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2720   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2721 
2722   if (reuse == MAT_INITIAL_MATRIX) {
2723     *matredundant = C;
2724 
2725     /* create a supporting struct and attach it to C for reuse */
2726     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2727     if (subsize == 1) {
2728       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2729       c->redundant = redund;
2730     } else {
2731       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2732       c->redundant = redund;
2733     }
2734 
2735     redund->nzlocal   = nzlocal;
2736     redund->nsends    = nsends;
2737     redund->nrecvs    = nrecvs;
2738     redund->send_rank = send_rank;
2739     redund->recv_rank = recv_rank;
2740     redund->sbuf_nz   = sbuf_nz;
2741     redund->rbuf_nz   = rbuf_nz;
2742     redund->sbuf_j    = sbuf_j;
2743     redund->sbuf_a    = sbuf_a;
2744     redund->rbuf_j    = rbuf_j;
2745     redund->rbuf_a    = rbuf_a;
2746     redund->psubcomm  = NULL;
2747 
2748     redund->Destroy = C->ops->destroy;
2749     C->ops->destroy = MatDestroy_MatRedundant;
2750   }
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 #undef __FUNCT__
2755 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2756 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2757 {
2758   PetscErrorCode ierr;
2759   MPI_Comm       comm;
2760   PetscMPIInt    size,subsize;
2761   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2762   Mat_Redundant  *redund=NULL;
2763   PetscSubcomm   psubcomm=NULL;
2764   MPI_Comm       subcomm_in=subcomm;
2765   Mat            *matseq;
2766   IS             isrow,iscol;
2767 
2768   PetscFunctionBegin;
2769   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2770     if (reuse ==  MAT_INITIAL_MATRIX) {
2771       /* create psubcomm, then get subcomm */
2772       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2773       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2774       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2775 
2776       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2777       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2778       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2779       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2780       subcomm = psubcomm->comm;
2781     } else { /* retrieve psubcomm and subcomm */
2782       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2783       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2784       if (subsize == 1) {
2785         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2786         redund = c->redundant;
2787       } else {
2788         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2789         redund = c->redundant;
2790       }
2791       psubcomm = redund->psubcomm;
2792     }
2793     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2794       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2795       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2796         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2797         if (subsize == 1) {
2798           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2799           c->redundant->psubcomm = psubcomm;
2800         } else {
2801           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2802           c->redundant->psubcomm = psubcomm ;
2803         }
2804       }
2805       PetscFunctionReturn(0);
2806     }
2807   }
2808 
2809   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2810   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2811   if (reuse == MAT_INITIAL_MATRIX) {
2812     /* create a local sequential matrix matseq[0] */
2813     mloc_sub = PETSC_DECIDE;
2814     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2815     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2816     rstart = rend - mloc_sub;
2817     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2818     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2819   } else { /* reuse == MAT_REUSE_MATRIX */
2820     if (subsize == 1) {
2821       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2822       redund = c->redundant;
2823     } else {
2824       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2825       redund = c->redundant;
2826     }
2827 
2828     isrow  = redund->isrow;
2829     iscol  = redund->iscol;
2830     matseq = redund->matseq;
2831   }
2832   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2833   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2834 
2835   if (reuse == MAT_INITIAL_MATRIX) {
2836     /* create a supporting struct and attach it to C for reuse */
2837     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2838     if (subsize == 1) {
2839       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2840       c->redundant = redund;
2841     } else {
2842       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2843       c->redundant = redund;
2844     }
2845     redund->isrow    = isrow;
2846     redund->iscol    = iscol;
2847     redund->matseq   = matseq;
2848     redund->psubcomm = psubcomm;
2849     redund->Destroy               = (*matredundant)->ops->destroy;
2850     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2851   }
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2857 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2858 {
2859   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2860   PetscErrorCode ierr;
2861   PetscInt       i,*idxb = 0;
2862   PetscScalar    *va,*vb;
2863   Vec            vtmp;
2864 
2865   PetscFunctionBegin;
2866   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2867   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2868   if (idx) {
2869     for (i=0; i<A->rmap->n; i++) {
2870       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2871     }
2872   }
2873 
2874   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2875   if (idx) {
2876     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2877   }
2878   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2879   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2880 
2881   for (i=0; i<A->rmap->n; i++) {
2882     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2883       va[i] = vb[i];
2884       if (idx) idx[i] = a->garray[idxb[i]];
2885     }
2886   }
2887 
2888   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2889   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2890   ierr = PetscFree(idxb);CHKERRQ(ierr);
2891   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2892   PetscFunctionReturn(0);
2893 }
2894 
2895 #undef __FUNCT__
2896 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2897 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2898 {
2899   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2900   PetscErrorCode ierr;
2901   PetscInt       i,*idxb = 0;
2902   PetscScalar    *va,*vb;
2903   Vec            vtmp;
2904 
2905   PetscFunctionBegin;
2906   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2907   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2908   if (idx) {
2909     for (i=0; i<A->cmap->n; i++) {
2910       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2911     }
2912   }
2913 
2914   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2915   if (idx) {
2916     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2917   }
2918   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2919   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2920 
2921   for (i=0; i<A->rmap->n; i++) {
2922     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2923       va[i] = vb[i];
2924       if (idx) idx[i] = a->garray[idxb[i]];
2925     }
2926   }
2927 
2928   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2929   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2930   ierr = PetscFree(idxb);CHKERRQ(ierr);
2931   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2937 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2938 {
2939   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2940   PetscInt       n      = A->rmap->n;
2941   PetscInt       cstart = A->cmap->rstart;
2942   PetscInt       *cmap  = mat->garray;
2943   PetscInt       *diagIdx, *offdiagIdx;
2944   Vec            diagV, offdiagV;
2945   PetscScalar    *a, *diagA, *offdiagA;
2946   PetscInt       r;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2951   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2952   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2953   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2954   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2955   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2956   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2957   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2958   for (r = 0; r < n; ++r) {
2959     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2960       a[r]   = diagA[r];
2961       idx[r] = cstart + diagIdx[r];
2962     } else {
2963       a[r]   = offdiagA[r];
2964       idx[r] = cmap[offdiagIdx[r]];
2965     }
2966   }
2967   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2968   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2969   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2970   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2971   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2972   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2978 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2979 {
2980   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2981   PetscInt       n      = A->rmap->n;
2982   PetscInt       cstart = A->cmap->rstart;
2983   PetscInt       *cmap  = mat->garray;
2984   PetscInt       *diagIdx, *offdiagIdx;
2985   Vec            diagV, offdiagV;
2986   PetscScalar    *a, *diagA, *offdiagA;
2987   PetscInt       r;
2988   PetscErrorCode ierr;
2989 
2990   PetscFunctionBegin;
2991   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2992   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2993   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2994   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2995   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2996   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2997   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2998   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2999   for (r = 0; r < n; ++r) {
3000     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3001       a[r]   = diagA[r];
3002       idx[r] = cstart + diagIdx[r];
3003     } else {
3004       a[r]   = offdiagA[r];
3005       idx[r] = cmap[offdiagIdx[r]];
3006     }
3007   }
3008   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3009   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3010   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3011   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3012   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3013   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3019 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3020 {
3021   PetscErrorCode ierr;
3022   Mat            *dummy;
3023 
3024   PetscFunctionBegin;
3025   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3026   *newmat = *dummy;
3027   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 #undef __FUNCT__
3032 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3033 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3034 {
3035   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3036   PetscErrorCode ierr;
3037 
3038   PetscFunctionBegin;
3039   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 #undef __FUNCT__
3044 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3045 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3046 {
3047   PetscErrorCode ierr;
3048   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3049 
3050   PetscFunctionBegin;
3051   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3052   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3053   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 /* -------------------------------------------------------------------*/
3059 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3060                                        MatGetRow_MPIAIJ,
3061                                        MatRestoreRow_MPIAIJ,
3062                                        MatMult_MPIAIJ,
3063                                 /* 4*/ MatMultAdd_MPIAIJ,
3064                                        MatMultTranspose_MPIAIJ,
3065                                        MatMultTransposeAdd_MPIAIJ,
3066 #if defined(PETSC_HAVE_PBGL)
3067                                        MatSolve_MPIAIJ,
3068 #else
3069                                        0,
3070 #endif
3071                                        0,
3072                                        0,
3073                                 /*10*/ 0,
3074                                        0,
3075                                        0,
3076                                        MatSOR_MPIAIJ,
3077                                        MatTranspose_MPIAIJ,
3078                                 /*15*/ MatGetInfo_MPIAIJ,
3079                                        MatEqual_MPIAIJ,
3080                                        MatGetDiagonal_MPIAIJ,
3081                                        MatDiagonalScale_MPIAIJ,
3082                                        MatNorm_MPIAIJ,
3083                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3084                                        MatAssemblyEnd_MPIAIJ,
3085                                        MatSetOption_MPIAIJ,
3086                                        MatZeroEntries_MPIAIJ,
3087                                 /*24*/ MatZeroRows_MPIAIJ,
3088                                        0,
3089 #if defined(PETSC_HAVE_PBGL)
3090                                        0,
3091 #else
3092                                        0,
3093 #endif
3094                                        0,
3095                                        0,
3096                                 /*29*/ MatSetUp_MPIAIJ,
3097 #if defined(PETSC_HAVE_PBGL)
3098                                        0,
3099 #else
3100                                        0,
3101 #endif
3102                                        0,
3103                                        0,
3104                                        0,
3105                                 /*34*/ MatDuplicate_MPIAIJ,
3106                                        0,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*39*/ MatAXPY_MPIAIJ,
3111                                        MatGetSubMatrices_MPIAIJ,
3112                                        MatIncreaseOverlap_MPIAIJ,
3113                                        MatGetValues_MPIAIJ,
3114                                        MatCopy_MPIAIJ,
3115                                 /*44*/ MatGetRowMax_MPIAIJ,
3116                                        MatScale_MPIAIJ,
3117                                        0,
3118                                        0,
3119                                        MatZeroRowsColumns_MPIAIJ,
3120                                 /*49*/ MatSetRandom_MPIAIJ,
3121                                        0,
3122                                        0,
3123                                        0,
3124                                        0,
3125                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3126                                        0,
3127                                        MatSetUnfactored_MPIAIJ,
3128                                        MatPermute_MPIAIJ,
3129                                        0,
3130                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3131                                        MatDestroy_MPIAIJ,
3132                                        MatView_MPIAIJ,
3133                                        0,
3134                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3135                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3136                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3137                                        0,
3138                                        0,
3139                                        0,
3140                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3141                                        MatGetRowMinAbs_MPIAIJ,
3142                                        0,
3143                                        MatSetColoring_MPIAIJ,
3144                                        0,
3145                                        MatSetValuesAdifor_MPIAIJ,
3146                                 /*75*/ MatFDColoringApply_AIJ,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                        MatFindZeroDiagonals_MPIAIJ,
3151                                 /*80*/ 0,
3152                                        0,
3153                                        0,
3154                                 /*83*/ MatLoad_MPIAIJ,
3155                                        0,
3156                                        0,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3161                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3162                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3163                                        MatPtAP_MPIAIJ_MPIAIJ,
3164                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3165                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3166                                        0,
3167                                        0,
3168                                        0,
3169                                        0,
3170                                 /*99*/ 0,
3171                                        0,
3172                                        0,
3173                                        MatConjugate_MPIAIJ,
3174                                        0,
3175                                 /*104*/MatSetValuesRow_MPIAIJ,
3176                                        MatRealPart_MPIAIJ,
3177                                        MatImaginaryPart_MPIAIJ,
3178                                        0,
3179                                        0,
3180                                 /*109*/0,
3181                                        MatGetRedundantMatrix_MPIAIJ,
3182                                        MatGetRowMin_MPIAIJ,
3183                                        0,
3184                                        0,
3185                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                        0,
3190                                 /*119*/0,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                        MatGetMultiProcBlock_MPIAIJ,
3195                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3196                                        MatGetColumnNorms_MPIAIJ,
3197                                        MatInvertBlockDiagonal_MPIAIJ,
3198                                        0,
3199                                        MatGetSubMatricesParallel_MPIAIJ,
3200                                 /*129*/0,
3201                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3202                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3203                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3204                                        0,
3205                                 /*134*/0,
3206                                        0,
3207                                        0,
3208                                        0,
3209                                        0,
3210                                 /*139*/0,
3211                                        0,
3212                                        0,
3213                                        MatFDColoringSetUp_MPIXAIJ
3214 };
3215 
3216 /* ----------------------------------------------------------------------------------------*/
3217 
3218 #undef __FUNCT__
3219 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3220 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3221 {
3222   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3223   PetscErrorCode ierr;
3224 
3225   PetscFunctionBegin;
3226   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3227   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 #undef __FUNCT__
3232 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3233 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3234 {
3235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3236   PetscErrorCode ierr;
3237 
3238   PetscFunctionBegin;
3239   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3240   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3246 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3247 {
3248   Mat_MPIAIJ     *b;
3249   PetscErrorCode ierr;
3250 
3251   PetscFunctionBegin;
3252   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3253   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3254   b = (Mat_MPIAIJ*)B->data;
3255 
3256   if (!B->preallocated) {
3257     /* Explicitly create 2 MATSEQAIJ matrices. */
3258     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3259     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3260     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3261     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3262     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3263     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3264     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3265     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3266     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3267     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3268   }
3269 
3270   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3271   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3272   B->preallocated = PETSC_TRUE;
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3278 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3279 {
3280   Mat            mat;
3281   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3282   PetscErrorCode ierr;
3283 
3284   PetscFunctionBegin;
3285   *newmat = 0;
3286   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3287   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3288   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3289   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3290   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3291   a       = (Mat_MPIAIJ*)mat->data;
3292 
3293   mat->factortype   = matin->factortype;
3294   mat->assembled    = PETSC_TRUE;
3295   mat->insertmode   = NOT_SET_VALUES;
3296   mat->preallocated = PETSC_TRUE;
3297 
3298   a->size         = oldmat->size;
3299   a->rank         = oldmat->rank;
3300   a->donotstash   = oldmat->donotstash;
3301   a->roworiented  = oldmat->roworiented;
3302   a->rowindices   = 0;
3303   a->rowvalues    = 0;
3304   a->getrowactive = PETSC_FALSE;
3305 
3306   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3307   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3308 
3309   if (oldmat->colmap) {
3310 #if defined(PETSC_USE_CTABLE)
3311     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3312 #else
3313     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3314     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3315     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3316 #endif
3317   } else a->colmap = 0;
3318   if (oldmat->garray) {
3319     PetscInt len;
3320     len  = oldmat->B->cmap->n;
3321     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3322     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3323     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3324   } else a->garray = 0;
3325 
3326   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3327   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3328   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3329   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3330   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3331   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3332   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3333   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3334   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3335   *newmat = mat;
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatLoad_MPIAIJ"
3343 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3344 {
3345   PetscScalar    *vals,*svals;
3346   MPI_Comm       comm;
3347   PetscErrorCode ierr;
3348   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3349   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3350   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3351   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3352   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3353   int            fd;
3354   PetscInt       bs = 1;
3355 
3356   PetscFunctionBegin;
3357   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3358   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3359   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3360   if (!rank) {
3361     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3362     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3363     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3364   }
3365 
3366   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3367   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3368   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3369 
3370   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3371 
3372   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3373   M    = header[1]; N = header[2];
3374   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3375   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3376   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3377 
3378   /* If global sizes are set, check if they are consistent with that given in the file */
3379   if (sizesset) {
3380     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3381   }
3382   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3383   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3384 
3385   /* determine ownership of all (block) rows */
3386   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3387   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3388   else m = newMat->rmap->n; /* Set by user */
3389 
3390   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3391   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3392 
3393   /* First process needs enough room for process with most rows */
3394   if (!rank) {
3395     mmax = rowners[1];
3396     for (i=2; i<=size; i++) {
3397       mmax = PetscMax(mmax, rowners[i]);
3398     }
3399   } else mmax = -1;             /* unused, but compilers complain */
3400 
3401   rowners[0] = 0;
3402   for (i=2; i<=size; i++) {
3403     rowners[i] += rowners[i-1];
3404   }
3405   rstart = rowners[rank];
3406   rend   = rowners[rank+1];
3407 
3408   /* distribute row lengths to all processors */
3409   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3410   if (!rank) {
3411     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3412     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3413     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3414     for (j=0; j<m; j++) {
3415       procsnz[0] += ourlens[j];
3416     }
3417     for (i=1; i<size; i++) {
3418       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3419       /* calculate the number of nonzeros on each processor */
3420       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3421         procsnz[i] += rowlengths[j];
3422       }
3423       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3424     }
3425     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3426   } else {
3427     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3428   }
3429 
3430   if (!rank) {
3431     /* determine max buffer needed and allocate it */
3432     maxnz = 0;
3433     for (i=0; i<size; i++) {
3434       maxnz = PetscMax(maxnz,procsnz[i]);
3435     }
3436     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3437 
3438     /* read in my part of the matrix column indices  */
3439     nz   = procsnz[0];
3440     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3441     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3442 
3443     /* read in every one elses and ship off */
3444     for (i=1; i<size; i++) {
3445       nz   = procsnz[i];
3446       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3447       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3448     }
3449     ierr = PetscFree(cols);CHKERRQ(ierr);
3450   } else {
3451     /* determine buffer space needed for message */
3452     nz = 0;
3453     for (i=0; i<m; i++) {
3454       nz += ourlens[i];
3455     }
3456     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3457 
3458     /* receive message of column indices*/
3459     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3460   }
3461 
3462   /* determine column ownership if matrix is not square */
3463   if (N != M) {
3464     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3465     else n = newMat->cmap->n;
3466     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3467     cstart = cend - n;
3468   } else {
3469     cstart = rstart;
3470     cend   = rend;
3471     n      = cend - cstart;
3472   }
3473 
3474   /* loop over local rows, determining number of off diagonal entries */
3475   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3476   jj   = 0;
3477   for (i=0; i<m; i++) {
3478     for (j=0; j<ourlens[i]; j++) {
3479       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3480       jj++;
3481     }
3482   }
3483 
3484   for (i=0; i<m; i++) {
3485     ourlens[i] -= offlens[i];
3486   }
3487   if (!sizesset) {
3488     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3489   }
3490 
3491   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3492 
3493   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3494 
3495   for (i=0; i<m; i++) {
3496     ourlens[i] += offlens[i];
3497   }
3498 
3499   if (!rank) {
3500     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3501 
3502     /* read in my part of the matrix numerical values  */
3503     nz   = procsnz[0];
3504     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3505 
3506     /* insert into matrix */
3507     jj      = rstart;
3508     smycols = mycols;
3509     svals   = vals;
3510     for (i=0; i<m; i++) {
3511       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3512       smycols += ourlens[i];
3513       svals   += ourlens[i];
3514       jj++;
3515     }
3516 
3517     /* read in other processors and ship out */
3518     for (i=1; i<size; i++) {
3519       nz   = procsnz[i];
3520       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3521       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3522     }
3523     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3524   } else {
3525     /* receive numeric values */
3526     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3527 
3528     /* receive message of values*/
3529     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3530 
3531     /* insert into matrix */
3532     jj      = rstart;
3533     smycols = mycols;
3534     svals   = vals;
3535     for (i=0; i<m; i++) {
3536       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3537       smycols += ourlens[i];
3538       svals   += ourlens[i];
3539       jj++;
3540     }
3541   }
3542   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3543   ierr = PetscFree(vals);CHKERRQ(ierr);
3544   ierr = PetscFree(mycols);CHKERRQ(ierr);
3545   ierr = PetscFree(rowners);CHKERRQ(ierr);
3546   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3547   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548   PetscFunctionReturn(0);
3549 }
3550 
3551 #undef __FUNCT__
3552 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3553 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3554 {
3555   PetscErrorCode ierr;
3556   IS             iscol_local;
3557   PetscInt       csize;
3558 
3559   PetscFunctionBegin;
3560   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3561   if (call == MAT_REUSE_MATRIX) {
3562     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3563     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3564   } else {
3565     PetscInt cbs;
3566     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3567     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3568     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3569   }
3570   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3571   if (call == MAT_INITIAL_MATRIX) {
3572     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3573     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3574   }
3575   PetscFunctionReturn(0);
3576 }
3577 
3578 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3579 #undef __FUNCT__
3580 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3581 /*
3582     Not great since it makes two copies of the submatrix, first an SeqAIJ
3583   in local and then by concatenating the local matrices the end result.
3584   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3585 
3586   Note: This requires a sequential iscol with all indices.
3587 */
3588 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3589 {
3590   PetscErrorCode ierr;
3591   PetscMPIInt    rank,size;
3592   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3593   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3594   PetscBool      allcolumns, colflag;
3595   Mat            M,Mreuse;
3596   MatScalar      *vwork,*aa;
3597   MPI_Comm       comm;
3598   Mat_SeqAIJ     *aij;
3599 
3600   PetscFunctionBegin;
3601   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3602   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3604 
3605   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3606   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3607   if (colflag && ncol == mat->cmap->N) {
3608     allcolumns = PETSC_TRUE;
3609   } else {
3610     allcolumns = PETSC_FALSE;
3611   }
3612   if (call ==  MAT_REUSE_MATRIX) {
3613     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3614     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3615     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3616   } else {
3617     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3618   }
3619 
3620   /*
3621       m - number of local rows
3622       n - number of columns (same on all processors)
3623       rstart - first row in new global matrix generated
3624   */
3625   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3626   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3627   if (call == MAT_INITIAL_MATRIX) {
3628     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3629     ii  = aij->i;
3630     jj  = aij->j;
3631 
3632     /*
3633         Determine the number of non-zeros in the diagonal and off-diagonal
3634         portions of the matrix in order to do correct preallocation
3635     */
3636 
3637     /* first get start and end of "diagonal" columns */
3638     if (csize == PETSC_DECIDE) {
3639       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3640       if (mglobal == n) { /* square matrix */
3641         nlocal = m;
3642       } else {
3643         nlocal = n/size + ((n % size) > rank);
3644       }
3645     } else {
3646       nlocal = csize;
3647     }
3648     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3649     rstart = rend - nlocal;
3650     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3651 
3652     /* next, compute all the lengths */
3653     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3654     olens = dlens + m;
3655     for (i=0; i<m; i++) {
3656       jend = ii[i+1] - ii[i];
3657       olen = 0;
3658       dlen = 0;
3659       for (j=0; j<jend; j++) {
3660         if (*jj < rstart || *jj >= rend) olen++;
3661         else dlen++;
3662         jj++;
3663       }
3664       olens[i] = olen;
3665       dlens[i] = dlen;
3666     }
3667     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3668     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3669     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3670     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3671     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3672     ierr = PetscFree(dlens);CHKERRQ(ierr);
3673   } else {
3674     PetscInt ml,nl;
3675 
3676     M    = *newmat;
3677     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3678     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3679     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3680     /*
3681          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3682        rather than the slower MatSetValues().
3683     */
3684     M->was_assembled = PETSC_TRUE;
3685     M->assembled     = PETSC_FALSE;
3686   }
3687   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3688   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3689   ii   = aij->i;
3690   jj   = aij->j;
3691   aa   = aij->a;
3692   for (i=0; i<m; i++) {
3693     row   = rstart + i;
3694     nz    = ii[i+1] - ii[i];
3695     cwork = jj;     jj += nz;
3696     vwork = aa;     aa += nz;
3697     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3698   }
3699 
3700   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3702   *newmat = M;
3703 
3704   /* save submatrix used in processor for next request */
3705   if (call ==  MAT_INITIAL_MATRIX) {
3706     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3707     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3708   }
3709   PetscFunctionReturn(0);
3710 }
3711 
3712 #undef __FUNCT__
3713 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3714 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3715 {
3716   PetscInt       m,cstart, cend,j,nnz,i,d;
3717   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3718   const PetscInt *JJ;
3719   PetscScalar    *values;
3720   PetscErrorCode ierr;
3721 
3722   PetscFunctionBegin;
3723   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3724 
3725   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3726   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3727   m      = B->rmap->n;
3728   cstart = B->cmap->rstart;
3729   cend   = B->cmap->rend;
3730   rstart = B->rmap->rstart;
3731 
3732   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3733 
3734 #if defined(PETSC_USE_DEBUGGING)
3735   for (i=0; i<m; i++) {
3736     nnz = Ii[i+1]- Ii[i];
3737     JJ  = J + Ii[i];
3738     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3739     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3740     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3741   }
3742 #endif
3743 
3744   for (i=0; i<m; i++) {
3745     nnz     = Ii[i+1]- Ii[i];
3746     JJ      = J + Ii[i];
3747     nnz_max = PetscMax(nnz_max,nnz);
3748     d       = 0;
3749     for (j=0; j<nnz; j++) {
3750       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3751     }
3752     d_nnz[i] = d;
3753     o_nnz[i] = nnz - d;
3754   }
3755   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3756   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3757 
3758   if (v) values = (PetscScalar*)v;
3759   else {
3760     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3761   }
3762 
3763   for (i=0; i<m; i++) {
3764     ii   = i + rstart;
3765     nnz  = Ii[i+1]- Ii[i];
3766     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3767   }
3768   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3769   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3770 
3771   if (!v) {
3772     ierr = PetscFree(values);CHKERRQ(ierr);
3773   }
3774   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 #undef __FUNCT__
3779 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3780 /*@
3781    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3782    (the default parallel PETSc format).
3783 
3784    Collective on MPI_Comm
3785 
3786    Input Parameters:
3787 +  B - the matrix
3788 .  i - the indices into j for the start of each local row (starts with zero)
3789 .  j - the column indices for each local row (starts with zero)
3790 -  v - optional values in the matrix
3791 
3792    Level: developer
3793 
3794    Notes:
3795        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3796      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3797      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3798 
3799        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3800 
3801        The format which is used for the sparse matrix input, is equivalent to a
3802     row-major ordering.. i.e for the following matrix, the input data expected is
3803     as shown:
3804 
3805         1 0 0
3806         2 0 3     P0
3807        -------
3808         4 5 6     P1
3809 
3810      Process0 [P0]: rows_owned=[0,1]
3811         i =  {0,1,3}  [size = nrow+1  = 2+1]
3812         j =  {0,0,2}  [size = nz = 6]
3813         v =  {1,2,3}  [size = nz = 6]
3814 
3815      Process1 [P1]: rows_owned=[2]
3816         i =  {0,3}    [size = nrow+1  = 1+1]
3817         j =  {0,1,2}  [size = nz = 6]
3818         v =  {4,5,6}  [size = nz = 6]
3819 
3820 .keywords: matrix, aij, compressed row, sparse, parallel
3821 
3822 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3823           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3824 @*/
3825 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3826 {
3827   PetscErrorCode ierr;
3828 
3829   PetscFunctionBegin;
3830   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 #undef __FUNCT__
3835 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3836 /*@C
3837    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3838    (the default parallel PETSc format).  For good matrix assembly performance
3839    the user should preallocate the matrix storage by setting the parameters
3840    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3841    performance can be increased by more than a factor of 50.
3842 
3843    Collective on MPI_Comm
3844 
3845    Input Parameters:
3846 +  A - the matrix
3847 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3848            (same value is used for all local rows)
3849 .  d_nnz - array containing the number of nonzeros in the various rows of the
3850            DIAGONAL portion of the local submatrix (possibly different for each row)
3851            or NULL, if d_nz is used to specify the nonzero structure.
3852            The size of this array is equal to the number of local rows, i.e 'm'.
3853            For matrices that will be factored, you must leave room for (and set)
3854            the diagonal entry even if it is zero.
3855 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3856            submatrix (same value is used for all local rows).
3857 -  o_nnz - array containing the number of nonzeros in the various rows of the
3858            OFF-DIAGONAL portion of the local submatrix (possibly different for
3859            each row) or NULL, if o_nz is used to specify the nonzero
3860            structure. The size of this array is equal to the number
3861            of local rows, i.e 'm'.
3862 
3863    If the *_nnz parameter is given then the *_nz parameter is ignored
3864 
3865    The AIJ format (also called the Yale sparse matrix format or
3866    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3867    storage.  The stored row and column indices begin with zero.
3868    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3869 
3870    The parallel matrix is partitioned such that the first m0 rows belong to
3871    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3872    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3873 
3874    The DIAGONAL portion of the local submatrix of a processor can be defined
3875    as the submatrix which is obtained by extraction the part corresponding to
3876    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3877    first row that belongs to the processor, r2 is the last row belonging to
3878    the this processor, and c1-c2 is range of indices of the local part of a
3879    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3880    common case of a square matrix, the row and column ranges are the same and
3881    the DIAGONAL part is also square. The remaining portion of the local
3882    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3883 
3884    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3885 
3886    You can call MatGetInfo() to get information on how effective the preallocation was;
3887    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3888    You can also run with the option -info and look for messages with the string
3889    malloc in them to see if additional memory allocation was needed.
3890 
3891    Example usage:
3892 
3893    Consider the following 8x8 matrix with 34 non-zero values, that is
3894    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3895    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3896    as follows:
3897 
3898 .vb
3899             1  2  0  |  0  3  0  |  0  4
3900     Proc0   0  5  6  |  7  0  0  |  8  0
3901             9  0 10  | 11  0  0  | 12  0
3902     -------------------------------------
3903            13  0 14  | 15 16 17  |  0  0
3904     Proc1   0 18  0  | 19 20 21  |  0  0
3905             0  0  0  | 22 23  0  | 24  0
3906     -------------------------------------
3907     Proc2  25 26 27  |  0  0 28  | 29  0
3908            30  0  0  | 31 32 33  |  0 34
3909 .ve
3910 
3911    This can be represented as a collection of submatrices as:
3912 
3913 .vb
3914       A B C
3915       D E F
3916       G H I
3917 .ve
3918 
3919    Where the submatrices A,B,C are owned by proc0, D,E,F are
3920    owned by proc1, G,H,I are owned by proc2.
3921 
3922    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3923    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3924    The 'M','N' parameters are 8,8, and have the same values on all procs.
3925 
3926    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3927    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3928    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3929    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3930    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3931    matrix, ans [DF] as another SeqAIJ matrix.
3932 
3933    When d_nz, o_nz parameters are specified, d_nz storage elements are
3934    allocated for every row of the local diagonal submatrix, and o_nz
3935    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3936    One way to choose d_nz and o_nz is to use the max nonzerors per local
3937    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3938    In this case, the values of d_nz,o_nz are:
3939 .vb
3940      proc0 : dnz = 2, o_nz = 2
3941      proc1 : dnz = 3, o_nz = 2
3942      proc2 : dnz = 1, o_nz = 4
3943 .ve
3944    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3945    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3946    for proc3. i.e we are using 12+15+10=37 storage locations to store
3947    34 values.
3948 
3949    When d_nnz, o_nnz parameters are specified, the storage is specified
3950    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3951    In the above case the values for d_nnz,o_nnz are:
3952 .vb
3953      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3954      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3955      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3956 .ve
3957    Here the space allocated is sum of all the above values i.e 34, and
3958    hence pre-allocation is perfect.
3959 
3960    Level: intermediate
3961 
3962 .keywords: matrix, aij, compressed row, sparse, parallel
3963 
3964 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3965           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3966 @*/
3967 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3968 {
3969   PetscErrorCode ierr;
3970 
3971   PetscFunctionBegin;
3972   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3973   PetscValidType(B,1);
3974   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 #undef __FUNCT__
3979 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3980 /*@
3981      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3982          CSR format the local rows.
3983 
3984    Collective on MPI_Comm
3985 
3986    Input Parameters:
3987 +  comm - MPI communicator
3988 .  m - number of local rows (Cannot be PETSC_DECIDE)
3989 .  n - This value should be the same as the local size used in creating the
3990        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3991        calculated if N is given) For square matrices n is almost always m.
3992 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3993 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3994 .   i - row indices
3995 .   j - column indices
3996 -   a - matrix values
3997 
3998    Output Parameter:
3999 .   mat - the matrix
4000 
4001    Level: intermediate
4002 
4003    Notes:
4004        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4005      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4006      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4007 
4008        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4009 
4010        The format which is used for the sparse matrix input, is equivalent to a
4011     row-major ordering.. i.e for the following matrix, the input data expected is
4012     as shown:
4013 
4014         1 0 0
4015         2 0 3     P0
4016        -------
4017         4 5 6     P1
4018 
4019      Process0 [P0]: rows_owned=[0,1]
4020         i =  {0,1,3}  [size = nrow+1  = 2+1]
4021         j =  {0,0,2}  [size = nz = 6]
4022         v =  {1,2,3}  [size = nz = 6]
4023 
4024      Process1 [P1]: rows_owned=[2]
4025         i =  {0,3}    [size = nrow+1  = 1+1]
4026         j =  {0,1,2}  [size = nz = 6]
4027         v =  {4,5,6}  [size = nz = 6]
4028 
4029 .keywords: matrix, aij, compressed row, sparse, parallel
4030 
4031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4032           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4033 @*/
4034 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4035 {
4036   PetscErrorCode ierr;
4037 
4038   PetscFunctionBegin;
4039   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4040   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4041   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4042   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4043   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4044   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4045   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4046   PetscFunctionReturn(0);
4047 }
4048 
4049 #undef __FUNCT__
4050 #define __FUNCT__ "MatCreateAIJ"
4051 /*@C
4052    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4053    (the default parallel PETSc format).  For good matrix assembly performance
4054    the user should preallocate the matrix storage by setting the parameters
4055    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4056    performance can be increased by more than a factor of 50.
4057 
4058    Collective on MPI_Comm
4059 
4060    Input Parameters:
4061 +  comm - MPI communicator
4062 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4063            This value should be the same as the local size used in creating the
4064            y vector for the matrix-vector product y = Ax.
4065 .  n - This value should be the same as the local size used in creating the
4066        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4067        calculated if N is given) For square matrices n is almost always m.
4068 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4069 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4070 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4071            (same value is used for all local rows)
4072 .  d_nnz - array containing the number of nonzeros in the various rows of the
4073            DIAGONAL portion of the local submatrix (possibly different for each row)
4074            or NULL, if d_nz is used to specify the nonzero structure.
4075            The size of this array is equal to the number of local rows, i.e 'm'.
4076 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4077            submatrix (same value is used for all local rows).
4078 -  o_nnz - array containing the number of nonzeros in the various rows of the
4079            OFF-DIAGONAL portion of the local submatrix (possibly different for
4080            each row) or NULL, if o_nz is used to specify the nonzero
4081            structure. The size of this array is equal to the number
4082            of local rows, i.e 'm'.
4083 
4084    Output Parameter:
4085 .  A - the matrix
4086 
4087    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4088    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4089    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4090 
4091    Notes:
4092    If the *_nnz parameter is given then the *_nz parameter is ignored
4093 
4094    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4095    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4096    storage requirements for this matrix.
4097 
4098    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4099    processor than it must be used on all processors that share the object for
4100    that argument.
4101 
4102    The user MUST specify either the local or global matrix dimensions
4103    (possibly both).
4104 
4105    The parallel matrix is partitioned across processors such that the
4106    first m0 rows belong to process 0, the next m1 rows belong to
4107    process 1, the next m2 rows belong to process 2 etc.. where
4108    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4109    values corresponding to [m x N] submatrix.
4110 
4111    The columns are logically partitioned with the n0 columns belonging
4112    to 0th partition, the next n1 columns belonging to the next
4113    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4114 
4115    The DIAGONAL portion of the local submatrix on any given processor
4116    is the submatrix corresponding to the rows and columns m,n
4117    corresponding to the given processor. i.e diagonal matrix on
4118    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4119    etc. The remaining portion of the local submatrix [m x (N-n)]
4120    constitute the OFF-DIAGONAL portion. The example below better
4121    illustrates this concept.
4122 
4123    For a square global matrix we define each processor's diagonal portion
4124    to be its local rows and the corresponding columns (a square submatrix);
4125    each processor's off-diagonal portion encompasses the remainder of the
4126    local matrix (a rectangular submatrix).
4127 
4128    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4129 
4130    When calling this routine with a single process communicator, a matrix of
4131    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4132    type of communicator, use the construction mechanism:
4133      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4134 
4135    By default, this format uses inodes (identical nodes) when possible.
4136    We search for consecutive rows with the same nonzero structure, thereby
4137    reusing matrix information to achieve increased efficiency.
4138 
4139    Options Database Keys:
4140 +  -mat_no_inode  - Do not use inodes
4141 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4142 -  -mat_aij_oneindex - Internally use indexing starting at 1
4143         rather than 0.  Note that when calling MatSetValues(),
4144         the user still MUST index entries starting at 0!
4145 
4146 
4147    Example usage:
4148 
4149    Consider the following 8x8 matrix with 34 non-zero values, that is
4150    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4151    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4152    as follows:
4153 
4154 .vb
4155             1  2  0  |  0  3  0  |  0  4
4156     Proc0   0  5  6  |  7  0  0  |  8  0
4157             9  0 10  | 11  0  0  | 12  0
4158     -------------------------------------
4159            13  0 14  | 15 16 17  |  0  0
4160     Proc1   0 18  0  | 19 20 21  |  0  0
4161             0  0  0  | 22 23  0  | 24  0
4162     -------------------------------------
4163     Proc2  25 26 27  |  0  0 28  | 29  0
4164            30  0  0  | 31 32 33  |  0 34
4165 .ve
4166 
4167    This can be represented as a collection of submatrices as:
4168 
4169 .vb
4170       A B C
4171       D E F
4172       G H I
4173 .ve
4174 
4175    Where the submatrices A,B,C are owned by proc0, D,E,F are
4176    owned by proc1, G,H,I are owned by proc2.
4177 
4178    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4179    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4180    The 'M','N' parameters are 8,8, and have the same values on all procs.
4181 
4182    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4183    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4184    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4185    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4186    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4187    matrix, ans [DF] as another SeqAIJ matrix.
4188 
4189    When d_nz, o_nz parameters are specified, d_nz storage elements are
4190    allocated for every row of the local diagonal submatrix, and o_nz
4191    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4192    One way to choose d_nz and o_nz is to use the max nonzerors per local
4193    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4194    In this case, the values of d_nz,o_nz are:
4195 .vb
4196      proc0 : dnz = 2, o_nz = 2
4197      proc1 : dnz = 3, o_nz = 2
4198      proc2 : dnz = 1, o_nz = 4
4199 .ve
4200    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4201    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4202    for proc3. i.e we are using 12+15+10=37 storage locations to store
4203    34 values.
4204 
4205    When d_nnz, o_nnz parameters are specified, the storage is specified
4206    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4207    In the above case the values for d_nnz,o_nnz are:
4208 .vb
4209      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4210      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4211      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4212 .ve
4213    Here the space allocated is sum of all the above values i.e 34, and
4214    hence pre-allocation is perfect.
4215 
4216    Level: intermediate
4217 
4218 .keywords: matrix, aij, compressed row, sparse, parallel
4219 
4220 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4221           MPIAIJ, MatCreateMPIAIJWithArrays()
4222 @*/
4223 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4224 {
4225   PetscErrorCode ierr;
4226   PetscMPIInt    size;
4227 
4228   PetscFunctionBegin;
4229   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4230   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4231   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4232   if (size > 1) {
4233     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4234     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4235   } else {
4236     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4237     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4238   }
4239   PetscFunctionReturn(0);
4240 }
4241 
4242 #undef __FUNCT__
4243 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4244 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4245 {
4246   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4247 
4248   PetscFunctionBegin;
4249   *Ad     = a->A;
4250   *Ao     = a->B;
4251   *colmap = a->garray;
4252   PetscFunctionReturn(0);
4253 }
4254 
4255 #undef __FUNCT__
4256 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4257 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4258 {
4259   PetscErrorCode ierr;
4260   PetscInt       i;
4261   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4262 
4263   PetscFunctionBegin;
4264   if (coloring->ctype == IS_COLORING_GLOBAL) {
4265     ISColoringValue *allcolors,*colors;
4266     ISColoring      ocoloring;
4267 
4268     /* set coloring for diagonal portion */
4269     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4270 
4271     /* set coloring for off-diagonal portion */
4272     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4273     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4274     for (i=0; i<a->B->cmap->n; i++) {
4275       colors[i] = allcolors[a->garray[i]];
4276     }
4277     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4278     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4279     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4280     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4281   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4282     ISColoringValue *colors;
4283     PetscInt        *larray;
4284     ISColoring      ocoloring;
4285 
4286     /* set coloring for diagonal portion */
4287     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4288     for (i=0; i<a->A->cmap->n; i++) {
4289       larray[i] = i + A->cmap->rstart;
4290     }
4291     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4292     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4293     for (i=0; i<a->A->cmap->n; i++) {
4294       colors[i] = coloring->colors[larray[i]];
4295     }
4296     ierr = PetscFree(larray);CHKERRQ(ierr);
4297     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4298     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4299     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4300 
4301     /* set coloring for off-diagonal portion */
4302     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4303     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4304     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4305     for (i=0; i<a->B->cmap->n; i++) {
4306       colors[i] = coloring->colors[larray[i]];
4307     }
4308     ierr = PetscFree(larray);CHKERRQ(ierr);
4309     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4310     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4311     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4312   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4313   PetscFunctionReturn(0);
4314 }
4315 
4316 #undef __FUNCT__
4317 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4318 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4319 {
4320   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4321   PetscErrorCode ierr;
4322 
4323   PetscFunctionBegin;
4324   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4325   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4326   PetscFunctionReturn(0);
4327 }
4328 
4329 #undef __FUNCT__
4330 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4331 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4332 {
4333   PetscErrorCode ierr;
4334   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4335   PetscInt       *indx;
4336 
4337   PetscFunctionBegin;
4338   /* This routine will ONLY return MPIAIJ type matrix */
4339   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4340   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4341   if (n == PETSC_DECIDE) {
4342     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4343   }
4344   /* Check sum(n) = N */
4345   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4346   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4347 
4348   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4349   rstart -= m;
4350 
4351   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4352   for (i=0; i<m; i++) {
4353     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4354     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4355     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4356   }
4357 
4358   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4359   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4360   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4361   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4362   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4363   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4364   PetscFunctionReturn(0);
4365 }
4366 
4367 #undef __FUNCT__
4368 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4369 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4370 {
4371   PetscErrorCode ierr;
4372   PetscInt       m,N,i,rstart,nnz,Ii;
4373   PetscInt       *indx;
4374   PetscScalar    *values;
4375 
4376   PetscFunctionBegin;
4377   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4378   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4379   for (i=0; i<m; i++) {
4380     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4381     Ii   = i + rstart;
4382     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4383     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4384   }
4385   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4386   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4387   PetscFunctionReturn(0);
4388 }
4389 
4390 #undef __FUNCT__
4391 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4392 /*@
4393       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4394                  matrices from each processor
4395 
4396     Collective on MPI_Comm
4397 
4398    Input Parameters:
4399 +    comm - the communicators the parallel matrix will live on
4400 .    inmat - the input sequential matrices
4401 .    n - number of local columns (or PETSC_DECIDE)
4402 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4403 
4404    Output Parameter:
4405 .    outmat - the parallel matrix generated
4406 
4407     Level: advanced
4408 
4409    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4410 
4411 @*/
4412 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4413 {
4414   PetscErrorCode ierr;
4415   PetscMPIInt    size;
4416 
4417   PetscFunctionBegin;
4418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4419   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4420   if (size == 1) {
4421     if (scall == MAT_INITIAL_MATRIX) {
4422       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4423     } else {
4424       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4425     }
4426   } else {
4427     if (scall == MAT_INITIAL_MATRIX) {
4428       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4429     }
4430     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4431   }
4432   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #undef __FUNCT__
4437 #define __FUNCT__ "MatFileSplit"
4438 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4439 {
4440   PetscErrorCode    ierr;
4441   PetscMPIInt       rank;
4442   PetscInt          m,N,i,rstart,nnz;
4443   size_t            len;
4444   const PetscInt    *indx;
4445   PetscViewer       out;
4446   char              *name;
4447   Mat               B;
4448   const PetscScalar *values;
4449 
4450   PetscFunctionBegin;
4451   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4452   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4453   /* Should this be the type of the diagonal block of A? */
4454   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4455   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4456   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4457   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4458   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4459   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4460   for (i=0; i<m; i++) {
4461     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4462     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4463     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4464   }
4465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4467 
4468   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4469   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4470   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4471   sprintf(name,"%s.%d",outfile,rank);
4472   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4473   ierr = PetscFree(name);CHKERRQ(ierr);
4474   ierr = MatView(B,out);CHKERRQ(ierr);
4475   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4476   ierr = MatDestroy(&B);CHKERRQ(ierr);
4477   PetscFunctionReturn(0);
4478 }
4479 
4480 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4481 #undef __FUNCT__
4482 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4483 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4484 {
4485   PetscErrorCode      ierr;
4486   Mat_Merge_SeqsToMPI *merge;
4487   PetscContainer      container;
4488 
4489   PetscFunctionBegin;
4490   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4491   if (container) {
4492     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4505     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4506     ierr = PetscFree(merge);CHKERRQ(ierr);
4507     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4508   }
4509   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #include <../src/mat/utils/freespace.h>
4514 #include <petscbt.h>
4515 
4516 #undef __FUNCT__
4517 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4518 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4519 {
4520   PetscErrorCode      ierr;
4521   MPI_Comm            comm;
4522   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4523   PetscMPIInt         size,rank,taga,*len_s;
4524   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4525   PetscInt            proc,m;
4526   PetscInt            **buf_ri,**buf_rj;
4527   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4528   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4529   MPI_Request         *s_waits,*r_waits;
4530   MPI_Status          *status;
4531   MatScalar           *aa=a->a;
4532   MatScalar           **abuf_r,*ba_i;
4533   Mat_Merge_SeqsToMPI *merge;
4534   PetscContainer      container;
4535 
4536   PetscFunctionBegin;
4537   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4538   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4539 
4540   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4541   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4542 
4543   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4544   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4545 
4546   bi     = merge->bi;
4547   bj     = merge->bj;
4548   buf_ri = merge->buf_ri;
4549   buf_rj = merge->buf_rj;
4550 
4551   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4552   owners = merge->rowmap->range;
4553   len_s  = merge->len_s;
4554 
4555   /* send and recv matrix values */
4556   /*-----------------------------*/
4557   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4558   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4559 
4560   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4561   for (proc=0,k=0; proc<size; proc++) {
4562     if (!len_s[proc]) continue;
4563     i    = owners[proc];
4564     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4565     k++;
4566   }
4567 
4568   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4569   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4570   ierr = PetscFree(status);CHKERRQ(ierr);
4571 
4572   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4573   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4574 
4575   /* insert mat values of mpimat */
4576   /*----------------------------*/
4577   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4578   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4579 
4580   for (k=0; k<merge->nrecv; k++) {
4581     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4582     nrows       = *(buf_ri_k[k]);
4583     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4584     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4585   }
4586 
4587   /* set values of ba */
4588   m = merge->rowmap->n;
4589   for (i=0; i<m; i++) {
4590     arow = owners[rank] + i;
4591     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4592     bnzi = bi[i+1] - bi[i];
4593     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4594 
4595     /* add local non-zero vals of this proc's seqmat into ba */
4596     anzi   = ai[arow+1] - ai[arow];
4597     aj     = a->j + ai[arow];
4598     aa     = a->a + ai[arow];
4599     nextaj = 0;
4600     for (j=0; nextaj<anzi; j++) {
4601       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4602         ba_i[j] += aa[nextaj++];
4603       }
4604     }
4605 
4606     /* add received vals into ba */
4607     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4608       /* i-th row */
4609       if (i == *nextrow[k]) {
4610         anzi   = *(nextai[k]+1) - *nextai[k];
4611         aj     = buf_rj[k] + *(nextai[k]);
4612         aa     = abuf_r[k] + *(nextai[k]);
4613         nextaj = 0;
4614         for (j=0; nextaj<anzi; j++) {
4615           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4616             ba_i[j] += aa[nextaj++];
4617           }
4618         }
4619         nextrow[k]++; nextai[k]++;
4620       }
4621     }
4622     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4623   }
4624   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4625   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4626 
4627   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4628   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4629   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4630   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4631   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4632   PetscFunctionReturn(0);
4633 }
4634 
4635 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4636 
4637 #undef __FUNCT__
4638 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4639 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4640 {
4641   PetscErrorCode      ierr;
4642   Mat                 B_mpi;
4643   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4644   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4645   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4646   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4647   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4648   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4649   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4650   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4651   MPI_Status          *status;
4652   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4653   PetscBT             lnkbt;
4654   Mat_Merge_SeqsToMPI *merge;
4655   PetscContainer      container;
4656 
4657   PetscFunctionBegin;
4658   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4659 
4660   /* make sure it is a PETSc comm */
4661   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4663   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4664 
4665   ierr = PetscNew(&merge);CHKERRQ(ierr);
4666   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4667 
4668   /* determine row ownership */
4669   /*---------------------------------------------------------*/
4670   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4671   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4672   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4673   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4674   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4675   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4676   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4677 
4678   m      = merge->rowmap->n;
4679   owners = merge->rowmap->range;
4680 
4681   /* determine the number of messages to send, their lengths */
4682   /*---------------------------------------------------------*/
4683   len_s = merge->len_s;
4684 
4685   len          = 0; /* length of buf_si[] */
4686   merge->nsend = 0;
4687   for (proc=0; proc<size; proc++) {
4688     len_si[proc] = 0;
4689     if (proc == rank) {
4690       len_s[proc] = 0;
4691     } else {
4692       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4693       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4694     }
4695     if (len_s[proc]) {
4696       merge->nsend++;
4697       nrows = 0;
4698       for (i=owners[proc]; i<owners[proc+1]; i++) {
4699         if (ai[i+1] > ai[i]) nrows++;
4700       }
4701       len_si[proc] = 2*(nrows+1);
4702       len         += len_si[proc];
4703     }
4704   }
4705 
4706   /* determine the number and length of messages to receive for ij-structure */
4707   /*-------------------------------------------------------------------------*/
4708   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4709   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4710 
4711   /* post the Irecv of j-structure */
4712   /*-------------------------------*/
4713   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4714   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4715 
4716   /* post the Isend of j-structure */
4717   /*--------------------------------*/
4718   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4719 
4720   for (proc=0, k=0; proc<size; proc++) {
4721     if (!len_s[proc]) continue;
4722     i    = owners[proc];
4723     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4724     k++;
4725   }
4726 
4727   /* receives and sends of j-structure are complete */
4728   /*------------------------------------------------*/
4729   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4730   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4731 
4732   /* send and recv i-structure */
4733   /*---------------------------*/
4734   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4735   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4736 
4737   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4738   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4739   for (proc=0,k=0; proc<size; proc++) {
4740     if (!len_s[proc]) continue;
4741     /* form outgoing message for i-structure:
4742          buf_si[0]:                 nrows to be sent
4743                [1:nrows]:           row index (global)
4744                [nrows+1:2*nrows+1]: i-structure index
4745     */
4746     /*-------------------------------------------*/
4747     nrows       = len_si[proc]/2 - 1;
4748     buf_si_i    = buf_si + nrows+1;
4749     buf_si[0]   = nrows;
4750     buf_si_i[0] = 0;
4751     nrows       = 0;
4752     for (i=owners[proc]; i<owners[proc+1]; i++) {
4753       anzi = ai[i+1] - ai[i];
4754       if (anzi) {
4755         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4756         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4757         nrows++;
4758       }
4759     }
4760     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4761     k++;
4762     buf_si += len_si[proc];
4763   }
4764 
4765   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4766   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4767 
4768   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4769   for (i=0; i<merge->nrecv; i++) {
4770     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4771   }
4772 
4773   ierr = PetscFree(len_si);CHKERRQ(ierr);
4774   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4775   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4776   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4777   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4778   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4779   ierr = PetscFree(status);CHKERRQ(ierr);
4780 
4781   /* compute a local seq matrix in each processor */
4782   /*----------------------------------------------*/
4783   /* allocate bi array and free space for accumulating nonzero column info */
4784   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4785   bi[0] = 0;
4786 
4787   /* create and initialize a linked list */
4788   nlnk = N+1;
4789   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4790 
4791   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4792   len  = ai[owners[rank+1]] - ai[owners[rank]];
4793   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4794 
4795   current_space = free_space;
4796 
4797   /* determine symbolic info for each local row */
4798   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4799 
4800   for (k=0; k<merge->nrecv; k++) {
4801     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4802     nrows       = *buf_ri_k[k];
4803     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4804     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4805   }
4806 
4807   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4808   len  = 0;
4809   for (i=0; i<m; i++) {
4810     bnzi = 0;
4811     /* add local non-zero cols of this proc's seqmat into lnk */
4812     arow  = owners[rank] + i;
4813     anzi  = ai[arow+1] - ai[arow];
4814     aj    = a->j + ai[arow];
4815     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4816     bnzi += nlnk;
4817     /* add received col data into lnk */
4818     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4819       if (i == *nextrow[k]) { /* i-th row */
4820         anzi  = *(nextai[k]+1) - *nextai[k];
4821         aj    = buf_rj[k] + *nextai[k];
4822         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4823         bnzi += nlnk;
4824         nextrow[k]++; nextai[k]++;
4825       }
4826     }
4827     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4828 
4829     /* if free space is not available, make more free space */
4830     if (current_space->local_remaining<bnzi) {
4831       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4832       nspacedouble++;
4833     }
4834     /* copy data into free space, then initialize lnk */
4835     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4836     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4837 
4838     current_space->array           += bnzi;
4839     current_space->local_used      += bnzi;
4840     current_space->local_remaining -= bnzi;
4841 
4842     bi[i+1] = bi[i] + bnzi;
4843   }
4844 
4845   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4846 
4847   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4848   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4849   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4850 
4851   /* create symbolic parallel matrix B_mpi */
4852   /*---------------------------------------*/
4853   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4854   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4855   if (n==PETSC_DECIDE) {
4856     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4857   } else {
4858     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4859   }
4860   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4861   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4862   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4863   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4864   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4865 
4866   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4867   B_mpi->assembled    = PETSC_FALSE;
4868   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4869   merge->bi           = bi;
4870   merge->bj           = bj;
4871   merge->buf_ri       = buf_ri;
4872   merge->buf_rj       = buf_rj;
4873   merge->coi          = NULL;
4874   merge->coj          = NULL;
4875   merge->owners_co    = NULL;
4876 
4877   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4878 
4879   /* attach the supporting struct to B_mpi for reuse */
4880   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4881   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4882   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4883   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4884   *mpimat = B_mpi;
4885 
4886   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4892 /*@C
4893       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4894                  matrices from each processor
4895 
4896     Collective on MPI_Comm
4897 
4898    Input Parameters:
4899 +    comm - the communicators the parallel matrix will live on
4900 .    seqmat - the input sequential matrices
4901 .    m - number of local rows (or PETSC_DECIDE)
4902 .    n - number of local columns (or PETSC_DECIDE)
4903 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4904 
4905    Output Parameter:
4906 .    mpimat - the parallel matrix generated
4907 
4908     Level: advanced
4909 
4910    Notes:
4911      The dimensions of the sequential matrix in each processor MUST be the same.
4912      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4913      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4914 @*/
4915 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4916 {
4917   PetscErrorCode ierr;
4918   PetscMPIInt    size;
4919 
4920   PetscFunctionBegin;
4921   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4922   if (size == 1) {
4923     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924     if (scall == MAT_INITIAL_MATRIX) {
4925       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4926     } else {
4927       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4928     }
4929     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930     PetscFunctionReturn(0);
4931   }
4932   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4933   if (scall == MAT_INITIAL_MATRIX) {
4934     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4935   }
4936   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4937   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4943 /*@
4944      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4945           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4946           with MatGetSize()
4947 
4948     Not Collective
4949 
4950    Input Parameters:
4951 +    A - the matrix
4952 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4953 
4954    Output Parameter:
4955 .    A_loc - the local sequential matrix generated
4956 
4957     Level: developer
4958 
4959 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4960 
4961 @*/
4962 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4963 {
4964   PetscErrorCode ierr;
4965   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4966   Mat_SeqAIJ     *mat,*a,*b;
4967   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4968   MatScalar      *aa,*ba,*cam;
4969   PetscScalar    *ca;
4970   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4971   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4972   PetscBool      match;
4973 
4974   PetscFunctionBegin;
4975   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4976   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4977   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4978   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4979   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4980   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4981   aa = a->a; ba = b->a;
4982   if (scall == MAT_INITIAL_MATRIX) {
4983     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4984     ci[0] = 0;
4985     for (i=0; i<am; i++) {
4986       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4987     }
4988     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4989     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4990     k    = 0;
4991     for (i=0; i<am; i++) {
4992       ncols_o = bi[i+1] - bi[i];
4993       ncols_d = ai[i+1] - ai[i];
4994       /* off-diagonal portion of A */
4995       for (jo=0; jo<ncols_o; jo++) {
4996         col = cmap[*bj];
4997         if (col >= cstart) break;
4998         cj[k]   = col; bj++;
4999         ca[k++] = *ba++;
5000       }
5001       /* diagonal portion of A */
5002       for (j=0; j<ncols_d; j++) {
5003         cj[k]   = cstart + *aj++;
5004         ca[k++] = *aa++;
5005       }
5006       /* off-diagonal portion of A */
5007       for (j=jo; j<ncols_o; j++) {
5008         cj[k]   = cmap[*bj++];
5009         ca[k++] = *ba++;
5010       }
5011     }
5012     /* put together the new matrix */
5013     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5014     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5015     /* Since these are PETSc arrays, change flags to free them as necessary. */
5016     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5017     mat->free_a  = PETSC_TRUE;
5018     mat->free_ij = PETSC_TRUE;
5019     mat->nonew   = 0;
5020   } else if (scall == MAT_REUSE_MATRIX) {
5021     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5022     ci = mat->i; cj = mat->j; cam = mat->a;
5023     for (i=0; i<am; i++) {
5024       /* off-diagonal portion of A */
5025       ncols_o = bi[i+1] - bi[i];
5026       for (jo=0; jo<ncols_o; jo++) {
5027         col = cmap[*bj];
5028         if (col >= cstart) break;
5029         *cam++ = *ba++; bj++;
5030       }
5031       /* diagonal portion of A */
5032       ncols_d = ai[i+1] - ai[i];
5033       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5034       /* off-diagonal portion of A */
5035       for (j=jo; j<ncols_o; j++) {
5036         *cam++ = *ba++; bj++;
5037       }
5038     }
5039   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5040   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5041   PetscFunctionReturn(0);
5042 }
5043 
5044 #undef __FUNCT__
5045 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5046 /*@C
5047      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5048 
5049     Not Collective
5050 
5051    Input Parameters:
5052 +    A - the matrix
5053 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5054 -    row, col - index sets of rows and columns to extract (or NULL)
5055 
5056    Output Parameter:
5057 .    A_loc - the local sequential matrix generated
5058 
5059     Level: developer
5060 
5061 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5062 
5063 @*/
5064 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5065 {
5066   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5067   PetscErrorCode ierr;
5068   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5069   IS             isrowa,iscola;
5070   Mat            *aloc;
5071   PetscBool      match;
5072 
5073   PetscFunctionBegin;
5074   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5075   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5076   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5077   if (!row) {
5078     start = A->rmap->rstart; end = A->rmap->rend;
5079     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5080   } else {
5081     isrowa = *row;
5082   }
5083   if (!col) {
5084     start = A->cmap->rstart;
5085     cmap  = a->garray;
5086     nzA   = a->A->cmap->n;
5087     nzB   = a->B->cmap->n;
5088     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5089     ncols = 0;
5090     for (i=0; i<nzB; i++) {
5091       if (cmap[i] < start) idx[ncols++] = cmap[i];
5092       else break;
5093     }
5094     imark = i;
5095     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5096     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5097     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5098   } else {
5099     iscola = *col;
5100   }
5101   if (scall != MAT_INITIAL_MATRIX) {
5102     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5103     aloc[0] = *A_loc;
5104   }
5105   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5106   *A_loc = aloc[0];
5107   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5108   if (!row) {
5109     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5110   }
5111   if (!col) {
5112     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5113   }
5114   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5115   PetscFunctionReturn(0);
5116 }
5117 
5118 #undef __FUNCT__
5119 #define __FUNCT__ "MatGetBrowsOfAcols"
5120 /*@C
5121     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5122 
5123     Collective on Mat
5124 
5125    Input Parameters:
5126 +    A,B - the matrices in mpiaij format
5127 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5128 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5129 
5130    Output Parameter:
5131 +    rowb, colb - index sets of rows and columns of B to extract
5132 -    B_seq - the sequential matrix generated
5133 
5134     Level: developer
5135 
5136 @*/
5137 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5138 {
5139   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5140   PetscErrorCode ierr;
5141   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5142   IS             isrowb,iscolb;
5143   Mat            *bseq=NULL;
5144 
5145   PetscFunctionBegin;
5146   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5147     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5148   }
5149   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5150 
5151   if (scall == MAT_INITIAL_MATRIX) {
5152     start = A->cmap->rstart;
5153     cmap  = a->garray;
5154     nzA   = a->A->cmap->n;
5155     nzB   = a->B->cmap->n;
5156     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5157     ncols = 0;
5158     for (i=0; i<nzB; i++) {  /* row < local row index */
5159       if (cmap[i] < start) idx[ncols++] = cmap[i];
5160       else break;
5161     }
5162     imark = i;
5163     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5164     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5165     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5166     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5167   } else {
5168     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5169     isrowb  = *rowb; iscolb = *colb;
5170     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5171     bseq[0] = *B_seq;
5172   }
5173   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5174   *B_seq = bseq[0];
5175   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5176   if (!rowb) {
5177     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5178   } else {
5179     *rowb = isrowb;
5180   }
5181   if (!colb) {
5182     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5183   } else {
5184     *colb = iscolb;
5185   }
5186   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5187   PetscFunctionReturn(0);
5188 }
5189 
5190 #undef __FUNCT__
5191 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5192 /*
5193     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5194     of the OFF-DIAGONAL portion of local A
5195 
5196     Collective on Mat
5197 
5198    Input Parameters:
5199 +    A,B - the matrices in mpiaij format
5200 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5201 
5202    Output Parameter:
5203 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5204 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5205 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5206 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5207 
5208     Level: developer
5209 
5210 */
5211 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5212 {
5213   VecScatter_MPI_General *gen_to,*gen_from;
5214   PetscErrorCode         ierr;
5215   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5216   Mat_SeqAIJ             *b_oth;
5217   VecScatter             ctx =a->Mvctx;
5218   MPI_Comm               comm;
5219   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5220   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5221   PetscScalar            *rvalues,*svalues;
5222   MatScalar              *b_otha,*bufa,*bufA;
5223   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5224   MPI_Request            *rwaits = NULL,*swaits = NULL;
5225   MPI_Status             *sstatus,rstatus;
5226   PetscMPIInt            jj;
5227   PetscInt               *cols,sbs,rbs;
5228   PetscScalar            *vals;
5229 
5230   PetscFunctionBegin;
5231   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5232   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5233     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5234   }
5235   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5236   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5237 
5238   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5239   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5240   rvalues  = gen_from->values; /* holds the length of receiving row */
5241   svalues  = gen_to->values;   /* holds the length of sending row */
5242   nrecvs   = gen_from->n;
5243   nsends   = gen_to->n;
5244 
5245   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5246   srow    = gen_to->indices;    /* local row index to be sent */
5247   sstarts = gen_to->starts;
5248   sprocs  = gen_to->procs;
5249   sstatus = gen_to->sstatus;
5250   sbs     = gen_to->bs;
5251   rstarts = gen_from->starts;
5252   rprocs  = gen_from->procs;
5253   rbs     = gen_from->bs;
5254 
5255   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5256   if (scall == MAT_INITIAL_MATRIX) {
5257     /* i-array */
5258     /*---------*/
5259     /*  post receives */
5260     for (i=0; i<nrecvs; i++) {
5261       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5262       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5263       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5264     }
5265 
5266     /* pack the outgoing message */
5267     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5268 
5269     sstartsj[0] = 0;
5270     rstartsj[0] = 0;
5271     len         = 0; /* total length of j or a array to be sent */
5272     k           = 0;
5273     for (i=0; i<nsends; i++) {
5274       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5275       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5276       for (j=0; j<nrows; j++) {
5277         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5278         for (l=0; l<sbs; l++) {
5279           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5280 
5281           rowlen[j*sbs+l] = ncols;
5282 
5283           len += ncols;
5284           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5285         }
5286         k++;
5287       }
5288       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5289 
5290       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5291     }
5292     /* recvs and sends of i-array are completed */
5293     i = nrecvs;
5294     while (i--) {
5295       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5296     }
5297     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5298 
5299     /* allocate buffers for sending j and a arrays */
5300     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5301     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5302 
5303     /* create i-array of B_oth */
5304     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5305 
5306     b_othi[0] = 0;
5307     len       = 0; /* total length of j or a array to be received */
5308     k         = 0;
5309     for (i=0; i<nrecvs; i++) {
5310       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5311       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5312       for (j=0; j<nrows; j++) {
5313         b_othi[k+1] = b_othi[k] + rowlen[j];
5314         len        += rowlen[j]; k++;
5315       }
5316       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5317     }
5318 
5319     /* allocate space for j and a arrrays of B_oth */
5320     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5321     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5322 
5323     /* j-array */
5324     /*---------*/
5325     /*  post receives of j-array */
5326     for (i=0; i<nrecvs; i++) {
5327       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5328       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5329     }
5330 
5331     /* pack the outgoing message j-array */
5332     k = 0;
5333     for (i=0; i<nsends; i++) {
5334       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5335       bufJ  = bufj+sstartsj[i];
5336       for (j=0; j<nrows; j++) {
5337         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5338         for (ll=0; ll<sbs; ll++) {
5339           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5340           for (l=0; l<ncols; l++) {
5341             *bufJ++ = cols[l];
5342           }
5343           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5344         }
5345       }
5346       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5347     }
5348 
5349     /* recvs and sends of j-array are completed */
5350     i = nrecvs;
5351     while (i--) {
5352       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5353     }
5354     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5355   } else if (scall == MAT_REUSE_MATRIX) {
5356     sstartsj = *startsj_s;
5357     rstartsj = *startsj_r;
5358     bufa     = *bufa_ptr;
5359     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5360     b_otha   = b_oth->a;
5361   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5362 
5363   /* a-array */
5364   /*---------*/
5365   /*  post receives of a-array */
5366   for (i=0; i<nrecvs; i++) {
5367     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5368     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5369   }
5370 
5371   /* pack the outgoing message a-array */
5372   k = 0;
5373   for (i=0; i<nsends; i++) {
5374     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5375     bufA  = bufa+sstartsj[i];
5376     for (j=0; j<nrows; j++) {
5377       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5378       for (ll=0; ll<sbs; ll++) {
5379         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5380         for (l=0; l<ncols; l++) {
5381           *bufA++ = vals[l];
5382         }
5383         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5384       }
5385     }
5386     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5387   }
5388   /* recvs and sends of a-array are completed */
5389   i = nrecvs;
5390   while (i--) {
5391     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5392   }
5393   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5394   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5395 
5396   if (scall == MAT_INITIAL_MATRIX) {
5397     /* put together the new matrix */
5398     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5399 
5400     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5401     /* Since these are PETSc arrays, change flags to free them as necessary. */
5402     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5403     b_oth->free_a  = PETSC_TRUE;
5404     b_oth->free_ij = PETSC_TRUE;
5405     b_oth->nonew   = 0;
5406 
5407     ierr = PetscFree(bufj);CHKERRQ(ierr);
5408     if (!startsj_s || !bufa_ptr) {
5409       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5410       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5411     } else {
5412       *startsj_s = sstartsj;
5413       *startsj_r = rstartsj;
5414       *bufa_ptr  = bufa;
5415     }
5416   }
5417   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5418   PetscFunctionReturn(0);
5419 }
5420 
5421 #undef __FUNCT__
5422 #define __FUNCT__ "MatGetCommunicationStructs"
5423 /*@C
5424   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5425 
5426   Not Collective
5427 
5428   Input Parameters:
5429 . A - The matrix in mpiaij format
5430 
5431   Output Parameter:
5432 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5433 . colmap - A map from global column index to local index into lvec
5434 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5435 
5436   Level: developer
5437 
5438 @*/
5439 #if defined(PETSC_USE_CTABLE)
5440 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5441 #else
5442 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5443 #endif
5444 {
5445   Mat_MPIAIJ *a;
5446 
5447   PetscFunctionBegin;
5448   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5449   PetscValidPointer(lvec, 2);
5450   PetscValidPointer(colmap, 3);
5451   PetscValidPointer(multScatter, 4);
5452   a = (Mat_MPIAIJ*) A->data;
5453   if (lvec) *lvec = a->lvec;
5454   if (colmap) *colmap = a->colmap;
5455   if (multScatter) *multScatter = a->Mvctx;
5456   PetscFunctionReturn(0);
5457 }
5458 
5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5462 
5463 #undef __FUNCT__
5464 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5465 /*
5466     Computes (B'*A')' since computing B*A directly is untenable
5467 
5468                n                       p                          p
5469         (              )       (              )         (                  )
5470       m (      A       )  *  n (       B      )   =   m (         C        )
5471         (              )       (              )         (                  )
5472 
5473 */
5474 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5475 {
5476   PetscErrorCode ierr;
5477   Mat            At,Bt,Ct;
5478 
5479   PetscFunctionBegin;
5480   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5481   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5482   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5483   ierr = MatDestroy(&At);CHKERRQ(ierr);
5484   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5485   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5486   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5487   PetscFunctionReturn(0);
5488 }
5489 
5490 #undef __FUNCT__
5491 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5492 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5493 {
5494   PetscErrorCode ierr;
5495   PetscInt       m=A->rmap->n,n=B->cmap->n;
5496   Mat            Cmat;
5497 
5498   PetscFunctionBegin;
5499   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5500   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5501   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5502   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5503   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5504   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5505   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5506   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5507 
5508   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5509 
5510   *C = Cmat;
5511   PetscFunctionReturn(0);
5512 }
5513 
5514 /* ----------------------------------------------------------------*/
5515 #undef __FUNCT__
5516 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5517 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5518 {
5519   PetscErrorCode ierr;
5520 
5521   PetscFunctionBegin;
5522   if (scall == MAT_INITIAL_MATRIX) {
5523     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5524     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5525     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5526   }
5527   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5528   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5529   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5530   PetscFunctionReturn(0);
5531 }
5532 
5533 #if defined(PETSC_HAVE_MUMPS)
5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5535 #endif
5536 #if defined(PETSC_HAVE_PASTIX)
5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5538 #endif
5539 #if defined(PETSC_HAVE_SUPERLU_DIST)
5540 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5541 #endif
5542 #if defined(PETSC_HAVE_CLIQUE)
5543 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5544 #endif
5545 
5546 /*MC
5547    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5548 
5549    Options Database Keys:
5550 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5551 
5552   Level: beginner
5553 
5554 .seealso: MatCreateAIJ()
5555 M*/
5556 
5557 #undef __FUNCT__
5558 #define __FUNCT__ "MatCreate_MPIAIJ"
5559 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5560 {
5561   Mat_MPIAIJ     *b;
5562   PetscErrorCode ierr;
5563   PetscMPIInt    size;
5564 
5565   PetscFunctionBegin;
5566   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5567 
5568   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5569   B->data       = (void*)b;
5570   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5571   B->assembled  = PETSC_FALSE;
5572   B->insertmode = NOT_SET_VALUES;
5573   b->size       = size;
5574 
5575   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5576 
5577   /* build cache for off array entries formed */
5578   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5579 
5580   b->donotstash  = PETSC_FALSE;
5581   b->colmap      = 0;
5582   b->garray      = 0;
5583   b->roworiented = PETSC_TRUE;
5584 
5585   /* stuff used for matrix vector multiply */
5586   b->lvec  = NULL;
5587   b->Mvctx = NULL;
5588 
5589   /* stuff for MatGetRow() */
5590   b->rowindices   = 0;
5591   b->rowvalues    = 0;
5592   b->getrowactive = PETSC_FALSE;
5593 
5594   /* flexible pointer used in CUSP/CUSPARSE classes */
5595   b->spptr = NULL;
5596 
5597 #if defined(PETSC_HAVE_MUMPS)
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5599 #endif
5600 #if defined(PETSC_HAVE_PASTIX)
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5602 #endif
5603 #if defined(PETSC_HAVE_SUPERLU_DIST)
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5605 #endif
5606 #if defined(PETSC_HAVE_CLIQUE)
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5608 #endif
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5623   PetscFunctionReturn(0);
5624 }
5625 
5626 #undef __FUNCT__
5627 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5628 /*@
5629      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5630          and "off-diagonal" part of the matrix in CSR format.
5631 
5632    Collective on MPI_Comm
5633 
5634    Input Parameters:
5635 +  comm - MPI communicator
5636 .  m - number of local rows (Cannot be PETSC_DECIDE)
5637 .  n - This value should be the same as the local size used in creating the
5638        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5639        calculated if N is given) For square matrices n is almost always m.
5640 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5641 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5642 .   i - row indices for "diagonal" portion of matrix
5643 .   j - column indices
5644 .   a - matrix values
5645 .   oi - row indices for "off-diagonal" portion of matrix
5646 .   oj - column indices
5647 -   oa - matrix values
5648 
5649    Output Parameter:
5650 .   mat - the matrix
5651 
5652    Level: advanced
5653 
5654    Notes:
5655        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5656        must free the arrays once the matrix has been destroyed and not before.
5657 
5658        The i and j indices are 0 based
5659 
5660        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5661 
5662        This sets local rows and cannot be used to set off-processor values.
5663 
5664        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5665        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5666        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5667        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5668        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5669        communication if it is known that only local entries will be set.
5670 
5671 .keywords: matrix, aij, compressed row, sparse, parallel
5672 
5673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5674           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5675 @*/
5676 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5677 {
5678   PetscErrorCode ierr;
5679   Mat_MPIAIJ     *maij;
5680 
5681   PetscFunctionBegin;
5682   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5683   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5684   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5685   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5686   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5687   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5688   maij = (Mat_MPIAIJ*) (*mat)->data;
5689 
5690   (*mat)->preallocated = PETSC_TRUE;
5691 
5692   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5693   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5694 
5695   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5696   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5697 
5698   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5702 
5703   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5705   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5706   PetscFunctionReturn(0);
5707 }
5708 
5709 /*
5710     Special version for direct calls from Fortran
5711 */
5712 #include <petsc-private/fortranimpl.h>
5713 
5714 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5715 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5716 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5717 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5718 #endif
5719 
5720 /* Change these macros so can be used in void function */
5721 #undef CHKERRQ
5722 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5723 #undef SETERRQ2
5724 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5725 #undef SETERRQ3
5726 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5727 #undef SETERRQ
5728 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5729 
5730 #undef __FUNCT__
5731 #define __FUNCT__ "matsetvaluesmpiaij_"
5732 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5733 {
5734   Mat            mat  = *mmat;
5735   PetscInt       m    = *mm, n = *mn;
5736   InsertMode     addv = *maddv;
5737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5738   PetscScalar    value;
5739   PetscErrorCode ierr;
5740 
5741   MatCheckPreallocated(mat,1);
5742   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5743 
5744 #if defined(PETSC_USE_DEBUG)
5745   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5746 #endif
5747   {
5748     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5749     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5750     PetscBool roworiented = aij->roworiented;
5751 
5752     /* Some Variables required in the macro */
5753     Mat        A                 = aij->A;
5754     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5755     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5756     MatScalar  *aa               = a->a;
5757     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5758     Mat        B                 = aij->B;
5759     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5760     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5761     MatScalar  *ba               = b->a;
5762 
5763     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5764     PetscInt  nonew = a->nonew;
5765     MatScalar *ap1,*ap2;
5766 
5767     PetscFunctionBegin;
5768     for (i=0; i<m; i++) {
5769       if (im[i] < 0) continue;
5770 #if defined(PETSC_USE_DEBUG)
5771       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5772 #endif
5773       if (im[i] >= rstart && im[i] < rend) {
5774         row      = im[i] - rstart;
5775         lastcol1 = -1;
5776         rp1      = aj + ai[row];
5777         ap1      = aa + ai[row];
5778         rmax1    = aimax[row];
5779         nrow1    = ailen[row];
5780         low1     = 0;
5781         high1    = nrow1;
5782         lastcol2 = -1;
5783         rp2      = bj + bi[row];
5784         ap2      = ba + bi[row];
5785         rmax2    = bimax[row];
5786         nrow2    = bilen[row];
5787         low2     = 0;
5788         high2    = nrow2;
5789 
5790         for (j=0; j<n; j++) {
5791           if (roworiented) value = v[i*n+j];
5792           else value = v[i+j*m];
5793           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5794           if (in[j] >= cstart && in[j] < cend) {
5795             col = in[j] - cstart;
5796             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5797           } else if (in[j] < 0) continue;
5798 #if defined(PETSC_USE_DEBUG)
5799           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5800 #endif
5801           else {
5802             if (mat->was_assembled) {
5803               if (!aij->colmap) {
5804                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5805               }
5806 #if defined(PETSC_USE_CTABLE)
5807               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5808               col--;
5809 #else
5810               col = aij->colmap[in[j]] - 1;
5811 #endif
5812               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5813                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5814                 col  =  in[j];
5815                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5816                 B     = aij->B;
5817                 b     = (Mat_SeqAIJ*)B->data;
5818                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5819                 rp2   = bj + bi[row];
5820                 ap2   = ba + bi[row];
5821                 rmax2 = bimax[row];
5822                 nrow2 = bilen[row];
5823                 low2  = 0;
5824                 high2 = nrow2;
5825                 bm    = aij->B->rmap->n;
5826                 ba    = b->a;
5827               }
5828             } else col = in[j];
5829             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5830           }
5831         }
5832       } else if (!aij->donotstash) {
5833         if (roworiented) {
5834           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5835         } else {
5836           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5837         }
5838       }
5839     }
5840   }
5841   PetscFunctionReturnVoid();
5842 }
5843 
5844