xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a3fa217bfefef3d54995bb82b2b5ea8a3c3e9019)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {
526           if (roworiented) value = v[i*n+j];
527           else             value = v[i+j*m];
528         } else value = 0.0;
529         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
530         if (in[j] >= cstart && in[j] < cend) {
531           col   = in[j] - cstart;
532           nonew = a->nonew;
533           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
534         } else if (in[j] < 0) continue;
535 #if defined(PETSC_USE_DEBUG)
536         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
537 #endif
538         else {
539           if (mat->was_assembled) {
540             if (!aij->colmap) {
541               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
542             }
543 #if defined(PETSC_USE_CTABLE)
544             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
545             col--;
546 #else
547             col = aij->colmap[in[j]] - 1;
548 #endif
549             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
550               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
551               col  =  in[j];
552               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
553               B     = aij->B;
554               b     = (Mat_SeqAIJ*)B->data;
555               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
556               rp2   = bj + bi[row];
557               ap2   = ba + bi[row];
558               rmax2 = bimax[row];
559               nrow2 = bilen[row];
560               low2  = 0;
561               high2 = nrow2;
562               bm    = aij->B->rmap->n;
563               ba    = b->a;
564             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
565           } else col = in[j];
566           nonew = b->nonew;
567           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
568         }
569       }
570     } else {
571       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
572       if (!aij->donotstash) {
573         mat->assembled = PETSC_FALSE;
574         if (roworiented) {
575           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
576         } else {
577           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         }
579       }
580     }
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatGetValues_MPIAIJ"
587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
588 {
589   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
590   PetscErrorCode ierr;
591   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
592   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
593 
594   PetscFunctionBegin;
595   for (i=0; i<m; i++) {
596     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
597     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
598     if (idxm[i] >= rstart && idxm[i] < rend) {
599       row = idxm[i] - rstart;
600       for (j=0; j<n; j++) {
601         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
602         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
603         if (idxn[j] >= cstart && idxn[j] < cend) {
604           col  = idxn[j] - cstart;
605           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
606         } else {
607           if (!aij->colmap) {
608             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609           }
610 #if defined(PETSC_USE_CTABLE)
611           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
612           col--;
613 #else
614           col = aij->colmap[idxn[j]] - 1;
615 #endif
616           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
617           else {
618             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
619           }
620         }
621       }
622     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
632 {
633   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
634   PetscErrorCode ierr;
635   PetscInt       nstash,reallocs;
636   InsertMode     addv;
637 
638   PetscFunctionBegin;
639   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
640 
641   /* make sure all processors are either in INSERTMODE or ADDMODE */
642   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
643   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
644   mat->insertmode = addv; /* in case this processor had no cache */
645 
646   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
647   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
648   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
655 {
656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
657   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
658   PetscErrorCode ierr;
659   PetscMPIInt    n;
660   PetscInt       i,j,rstart,ncols,flg;
661   PetscInt       *row,*col;
662   PetscBool      other_disassembled;
663   PetscScalar    *val;
664   InsertMode     addv = mat->insertmode;
665 
666   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
667 
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n; ) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) {
677           if (row[j] != rstart) break;
678         }
679         if (j < n) ncols = j-i;
680         else       ncols = n-i;
681         /* Now assemble all these values with a single function call */
682         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
683 
684         i = j;
685       }
686     }
687     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
688   }
689   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
690   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
691 
692   /* determine if any processor has disassembled, if so we must
693      also disassemble ourselfs, in order that we may reassemble. */
694   /*
695      if nonzero structure of submatrix B cannot change then we know that
696      no processor disassembled thus we can skip this stuff
697   */
698   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
699     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
700     if (mat->was_assembled && !other_disassembled) {
701       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
702     }
703   }
704   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
705     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
706   }
707   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
708   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
709   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
710 
711   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
712 
713   aij->rowvalues = 0;
714 
715   /* used by MatAXPY() */
716   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
717   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
718 
719   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
720   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
721 
722   {
723     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
724     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
747   PetscInt      *owners = A->rmap->range;
748   PetscInt       n      = A->rmap->n;
749   PetscMPIInt    size   = mat->size;
750   PetscSF        sf;
751   PetscInt      *lrows;
752   PetscSFNode   *rrows;
753   PetscInt       lastidx = -1, r, p = 0, len = 0;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   /* Create SF where leaves are input rows and roots are owned rows */
758   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
759   for (r = 0; r < n; ++r) lrows[r] = -1;
760   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
761   for (r = 0; r < N; ++r) {
762     const PetscInt idx   = rows[r];
763     PetscBool      found = PETSC_FALSE;
764     /* Trick for efficient searching for sorted rows */
765     if (lastidx > idx) p = 0;
766     lastidx = idx;
767     for (; p < size; ++p) {
768       if (idx >= owners[p] && idx < owners[p+1]) {
769         rrows[r].rank  = p;
770         rrows[r].index = rows[r] - owners[p];
771         found = PETSC_TRUE;
772         break;
773       }
774     }
775     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
776   }
777   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
778   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
779   /* Collect flags for rows to be zeroed */
780   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
781   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
782   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
783   /* Compress and put in row numbers */
784   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
792     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
793     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
794     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
795   }
796   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
797   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
798   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
799     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
800   } else if (diag != 0.0) {
801     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
802     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
803     for (r = 0; r < len; ++r) {
804       const PetscInt row = lrows[r] + A->rmap->rstart;
805       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
806     }
807     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809   } else {
810     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
811   }
812   ierr = PetscFree(lrows);CHKERRQ(ierr);
813   {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMult_MPIAIJ"
941 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
942 {
943   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
944   PetscErrorCode ierr;
945   PetscInt       nt;
946 
947   PetscFunctionBegin;
948   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
949   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
950   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 #undef __FUNCT__
958 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
959 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultAdd_MPIAIJ"
971 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
986 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscBool      merged;
991 
992   PetscFunctionBegin;
993   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
994   /* do nondiagonal part */
995   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
996   if (!merged) {
997     /* send it on its way */
998     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999     /* do local part */
1000     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1001     /* receive remote parts: note this assumes the values are not actually */
1002     /* added in yy until the next line, */
1003     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004   } else {
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* send it on its way */
1008     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009     /* values actually were received in the Begin() but we need to call this nop */
1010     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1017 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1018 {
1019   MPI_Comm       comm;
1020   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1021   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1022   IS             Me,Notme;
1023   PetscErrorCode ierr;
1024   PetscInt       M,N,first,last,*notme,i;
1025   PetscMPIInt    size;
1026 
1027   PetscFunctionBegin;
1028   /* Easy test: symmetric diagonal block */
1029   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1030   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1031   if (!*f) PetscFunctionReturn(0);
1032   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1034   if (size == 1) PetscFunctionReturn(0);
1035 
1036   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1037   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1038   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1039   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1040   for (i=0; i<first; i++) notme[i] = i;
1041   for (i=last; i<M; i++) notme[i-last+first] = i;
1042   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1043   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1044   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1045   Aoff = Aoffs[0];
1046   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1047   Boff = Boffs[0];
1048   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1049   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1050   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1051   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1052   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1053   ierr = PetscFree(notme);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   /* do nondiagonal part */
1066   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1067   /* send it on its way */
1068   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1069   /* do local part */
1070   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1071   /* receive remote parts */
1072   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*
1077   This only works correctly for square matrices where the subblock A->A is the
1078    diagonal block
1079 */
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1083 {
1084   PetscErrorCode ierr;
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086 
1087   PetscFunctionBegin;
1088   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1089   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1090   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatScale_MPIAIJ"
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1103   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatDestroy_MPIAIJ"
1109 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1110 {
1111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115 #if defined(PETSC_USE_LOG)
1116   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1117 #endif
1118   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1119   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1120   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1121   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1122 #if defined(PETSC_USE_CTABLE)
1123   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1124 #else
1125   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1126 #endif
1127   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1129   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1130   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1131   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1132   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1133 
1134   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1149 {
1150   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1151   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1152   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1153   PetscErrorCode ierr;
1154   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1155   int            fd;
1156   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1157   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1158   PetscScalar    *column_values;
1159   PetscInt       message_count,flowcontrolcount;
1160   FILE           *file;
1161 
1162   PetscFunctionBegin;
1163   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1165   nz   = A->nz + B->nz;
1166   if (!rank) {
1167     header[0] = MAT_FILE_CLASSID;
1168     header[1] = mat->rmap->N;
1169     header[2] = mat->cmap->N;
1170 
1171     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1173     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     /* get largest number of rows any processor has */
1175     rlen  = mat->rmap->n;
1176     range = mat->rmap->range;
1177     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1178   } else {
1179     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     rlen = mat->rmap->n;
1181   }
1182 
1183   /* load up the local row counts */
1184   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1185   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1186 
1187   /* store the row lengths to the file */
1188   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1189   if (!rank) {
1190     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     for (i=1; i<size; i++) {
1192       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1193       rlen = range[i+1] - range[i];
1194       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1196     }
1197     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1198   } else {
1199     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1200     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1201     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1202   }
1203   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1204 
1205   /* load up the local column indices */
1206   nzmax = nz; /* th processor needs space a largest processor needs */
1207   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1208   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1209   cnt   = 0;
1210   for (i=0; i<mat->rmap->n; i++) {
1211     for (j=B->i[i]; j<B->i[i+1]; j++) {
1212       if ((col = garray[B->j[j]]) > cstart) break;
1213       column_indices[cnt++] = col;
1214     }
1215     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1216     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1217   }
1218   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1219 
1220   /* store the column indices to the file */
1221   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1227       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1229       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     }
1232     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1233   } else {
1234     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1235     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1238   }
1239   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1240 
1241   /* load up the local column values */
1242   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1243   cnt  = 0;
1244   for (i=0; i<mat->rmap->n; i++) {
1245     for (j=B->i[i]; j<B->i[i+1]; j++) {
1246       if (garray[B->j[j]] > cstart) break;
1247       column_values[cnt++] = B->a[j];
1248     }
1249     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1250     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1251   }
1252   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1253 
1254   /* store the column values to the file */
1255   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1256   if (!rank) {
1257     MPI_Status status;
1258     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1259     for (i=1; i<size; i++) {
1260       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1261       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1262       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1263       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1264       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1265     }
1266     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1267   } else {
1268     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1269     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1272   }
1273   ierr = PetscFree(column_values);CHKERRQ(ierr);
1274 
1275   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1276   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #include <petscdraw.h>
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank = aij->rank,size = aij->size;
1288   PetscBool         isdraw,iascii,isbinary;
1289   PetscViewer       sviewer;
1290   PetscViewerFormat format;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1296   if (iascii) {
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscBool inodes;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (isdraw) {
1343     PetscDraw draw;
1344     PetscBool isnull;
1345     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1346     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1347   }
1348 
1349   if (size == 1) {
1350     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1351     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1352   } else {
1353     /* assemble the entire matrix onto first processor. */
1354     Mat        A;
1355     Mat_SeqAIJ *Aloc;
1356     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1357     MatScalar  *a;
1358 
1359     if (mat->rmap->N > 1024) {
1360       PetscBool flg = PETSC_FALSE;
1361 
1362       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1363       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1364     }
1365 
1366     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1367     if (!rank) {
1368       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1371     }
1372     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1373     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1374     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1375     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1376     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1377 
1378     /* copy over the A part */
1379     Aloc = (Mat_SeqAIJ*)aij->A->data;
1380     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1381     row  = mat->rmap->rstart;
1382     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1383     for (i=0; i<m; i++) {
1384       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1385       row++;
1386       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1387     }
1388     aj = Aloc->j;
1389     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1390 
1391     /* copy over the B part */
1392     Aloc = (Mat_SeqAIJ*)aij->B->data;
1393     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1394     row  = mat->rmap->rstart;
1395     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1396     ct   = cols;
1397     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1398     for (i=0; i<m; i++) {
1399       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1400       row++;
1401       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1402     }
1403     ierr = PetscFree(ct);CHKERRQ(ierr);
1404     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406     /*
1407        Everyone has to call to draw the matrix since the graphics waits are
1408        synchronized across all processors that share the PetscDraw object
1409     */
1410     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1411     if (!rank) {
1412       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1413       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1414       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1415       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1416     }
1417     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1418     ierr = MatDestroy(&A);CHKERRQ(ierr);
1419   }
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatView_MPIAIJ"
1425 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1426 {
1427   PetscErrorCode ierr;
1428   PetscBool      iascii,isdraw,issocket,isbinary;
1429 
1430   PetscFunctionBegin;
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1435   if (iascii || isdraw || isbinary || issocket) {
1436     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatSOR_MPIAIJ"
1443 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1444 {
1445   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1446   PetscErrorCode ierr;
1447   Vec            bb1 = 0;
1448   PetscBool      hasop;
1449 
1450   PetscFunctionBegin;
1451   if (flag == SOR_APPLY_UPPER) {
1452     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1453     PetscFunctionReturn(0);
1454   }
1455 
1456   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1457     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1458   }
1459 
1460   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1461     if (flag & SOR_ZERO_INITIAL_GUESS) {
1462       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1463       its--;
1464     }
1465 
1466     while (its--) {
1467       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1468       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1469 
1470       /* update rhs: bb1 = bb - B*x */
1471       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1472       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1473 
1474       /* local sweep */
1475       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1476     }
1477   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1478     if (flag & SOR_ZERO_INITIAL_GUESS) {
1479       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1480       its--;
1481     }
1482     while (its--) {
1483       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1484       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1485 
1486       /* update rhs: bb1 = bb - B*x */
1487       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1488       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1489 
1490       /* local sweep */
1491       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1492     }
1493   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1494     if (flag & SOR_ZERO_INITIAL_GUESS) {
1495       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1496       its--;
1497     }
1498     while (its--) {
1499       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501 
1502       /* update rhs: bb1 = bb - B*x */
1503       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1504       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1505 
1506       /* local sweep */
1507       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1508     }
1509   } else if (flag & SOR_EISENSTAT) {
1510     Vec xx1;
1511 
1512     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1513     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1514 
1515     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1516     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1517     if (!mat->diag) {
1518       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1519       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1520     }
1521     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1522     if (hasop) {
1523       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1524     } else {
1525       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1526     }
1527     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1528 
1529     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1530 
1531     /* local sweep */
1532     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1533     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1534     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1535   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1536 
1537   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatPermute_MPIAIJ"
1543 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1544 {
1545   Mat            aA,aB,Aperm;
1546   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1547   PetscScalar    *aa,*ba;
1548   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1549   PetscSF        rowsf,sf;
1550   IS             parcolp = NULL;
1551   PetscBool      done;
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1556   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1557   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1558   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1559 
1560   /* Invert row permutation to find out where my rows should go */
1561   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1562   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1563   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1564   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1565   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1566   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1567 
1568   /* Invert column permutation to find out where my columns should go */
1569   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1570   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1571   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1572   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1573   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1574   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1575   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1576 
1577   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1578   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1579   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1580 
1581   /* Find out where my gcols should go */
1582   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1583   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1584   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1585   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1586   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1587   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1588   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1589   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1590 
1591   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1592   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1593   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) {
1595     PetscInt row = rdest[i],rowner;
1596     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1597     for (j=ai[i]; j<ai[i+1]; j++) {
1598       PetscInt cowner,col = cdest[aj[j]];
1599       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1600       if (rowner == cowner) dnnz[i]++;
1601       else onnz[i]++;
1602     }
1603     for (j=bi[i]; j<bi[i+1]; j++) {
1604       PetscInt cowner,col = gcdest[bj[j]];
1605       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1606       if (rowner == cowner) dnnz[i]++;
1607       else onnz[i]++;
1608     }
1609   }
1610   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1612   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1614   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1615 
1616   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1617   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1618   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1619   for (i=0; i<m; i++) {
1620     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1621     PetscInt j0,rowlen;
1622     rowlen = ai[i+1] - ai[i];
1623     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1624       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1625       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1626     }
1627     rowlen = bi[i+1] - bi[i];
1628     for (j0=j=0; j<rowlen; j0=j) {
1629       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1630       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1631     }
1632   }
1633   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1634   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1635   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1636   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1637   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1638   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1639   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1640   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1641   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1642   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1643   *B = Aperm;
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1649 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1650 {
1651   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1652   Mat            A    = mat->A,B = mat->B;
1653   PetscErrorCode ierr;
1654   PetscReal      isend[5],irecv[5];
1655 
1656   PetscFunctionBegin;
1657   info->block_size = 1.0;
1658   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1659 
1660   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1661   isend[3] = info->memory;  isend[4] = info->mallocs;
1662 
1663   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1664 
1665   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1666   isend[3] += info->memory;  isend[4] += info->mallocs;
1667   if (flag == MAT_LOCAL) {
1668     info->nz_used      = isend[0];
1669     info->nz_allocated = isend[1];
1670     info->nz_unneeded  = isend[2];
1671     info->memory       = isend[3];
1672     info->mallocs      = isend[4];
1673   } else if (flag == MAT_GLOBAL_MAX) {
1674     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1675 
1676     info->nz_used      = irecv[0];
1677     info->nz_allocated = irecv[1];
1678     info->nz_unneeded  = irecv[2];
1679     info->memory       = irecv[3];
1680     info->mallocs      = irecv[4];
1681   } else if (flag == MAT_GLOBAL_SUM) {
1682     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1683 
1684     info->nz_used      = irecv[0];
1685     info->nz_allocated = irecv[1];
1686     info->nz_unneeded  = irecv[2];
1687     info->memory       = irecv[3];
1688     info->mallocs      = irecv[4];
1689   }
1690   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1691   info->fill_ratio_needed = 0;
1692   info->factor_mallocs    = 0;
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatSetOption_MPIAIJ"
1698 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1699 {
1700   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1701   PetscErrorCode ierr;
1702 
1703   PetscFunctionBegin;
1704   switch (op) {
1705   case MAT_NEW_NONZERO_LOCATIONS:
1706   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1707   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1708   case MAT_KEEP_NONZERO_PATTERN:
1709   case MAT_NEW_NONZERO_LOCATION_ERR:
1710   case MAT_USE_INODES:
1711   case MAT_IGNORE_ZERO_ENTRIES:
1712     MatCheckPreallocated(A,1);
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1715     break;
1716   case MAT_ROW_ORIENTED:
1717     a->roworiented = flg;
1718 
1719     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1720     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1721     break;
1722   case MAT_NEW_DIAGONALS:
1723     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1724     break;
1725   case MAT_IGNORE_OFF_PROC_ENTRIES:
1726     a->donotstash = flg;
1727     break;
1728   case MAT_SPD:
1729     A->spd_set = PETSC_TRUE;
1730     A->spd     = flg;
1731     if (flg) {
1732       A->symmetric                  = PETSC_TRUE;
1733       A->structurally_symmetric     = PETSC_TRUE;
1734       A->symmetric_set              = PETSC_TRUE;
1735       A->structurally_symmetric_set = PETSC_TRUE;
1736     }
1737     break;
1738   case MAT_SYMMETRIC:
1739     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1740     break;
1741   case MAT_STRUCTURALLY_SYMMETRIC:
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_HERMITIAN:
1745     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1746     break;
1747   case MAT_SYMMETRY_ETERNAL:
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     break;
1750   default:
1751     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1752   }
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatGetRow_MPIAIJ"
1758 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1759 {
1760   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1761   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1762   PetscErrorCode ierr;
1763   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1764   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1765   PetscInt       *cmap,*idx_p;
1766 
1767   PetscFunctionBegin;
1768   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1769   mat->getrowactive = PETSC_TRUE;
1770 
1771   if (!mat->rowvalues && (idx || v)) {
1772     /*
1773         allocate enough space to hold information from the longest row.
1774     */
1775     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1776     PetscInt   max = 1,tmp;
1777     for (i=0; i<matin->rmap->n; i++) {
1778       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1779       if (max < tmp) max = tmp;
1780     }
1781     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1782   }
1783 
1784   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1785   lrow = row - rstart;
1786 
1787   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1788   if (!v)   {pvA = 0; pvB = 0;}
1789   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1790   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1791   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1792   nztot = nzA + nzB;
1793 
1794   cmap = mat->garray;
1795   if (v  || idx) {
1796     if (nztot) {
1797       /* Sort by increasing column numbers, assuming A and B already sorted */
1798       PetscInt imark = -1;
1799       if (v) {
1800         *v = v_p = mat->rowvalues;
1801         for (i=0; i<nzB; i++) {
1802           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1803           else break;
1804         }
1805         imark = i;
1806         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1807         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1808       }
1809       if (idx) {
1810         *idx = idx_p = mat->rowindices;
1811         if (imark > -1) {
1812           for (i=0; i<imark; i++) {
1813             idx_p[i] = cmap[cworkB[i]];
1814           }
1815         } else {
1816           for (i=0; i<nzB; i++) {
1817             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1818             else break;
1819           }
1820           imark = i;
1821         }
1822         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1823         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1824       }
1825     } else {
1826       if (idx) *idx = 0;
1827       if (v)   *v   = 0;
1828     }
1829   }
1830   *nz  = nztot;
1831   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1832   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1838 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1839 {
1840   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1841 
1842   PetscFunctionBegin;
1843   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1844   aij->getrowactive = PETSC_FALSE;
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatNorm_MPIAIJ"
1850 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1851 {
1852   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1853   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1854   PetscErrorCode ierr;
1855   PetscInt       i,j,cstart = mat->cmap->rstart;
1856   PetscReal      sum = 0.0;
1857   MatScalar      *v;
1858 
1859   PetscFunctionBegin;
1860   if (aij->size == 1) {
1861     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1862   } else {
1863     if (type == NORM_FROBENIUS) {
1864       v = amat->a;
1865       for (i=0; i<amat->nz; i++) {
1866         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1867       }
1868       v = bmat->a;
1869       for (i=0; i<bmat->nz; i++) {
1870         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1871       }
1872       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1873       *norm = PetscSqrtReal(*norm);
1874     } else if (type == NORM_1) { /* max column norm */
1875       PetscReal *tmp,*tmp2;
1876       PetscInt  *jj,*garray = aij->garray;
1877       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1878       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1879       *norm = 0.0;
1880       v     = amat->a; jj = amat->j;
1881       for (j=0; j<amat->nz; j++) {
1882         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1883       }
1884       v = bmat->a; jj = bmat->j;
1885       for (j=0; j<bmat->nz; j++) {
1886         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1887       }
1888       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1889       for (j=0; j<mat->cmap->N; j++) {
1890         if (tmp2[j] > *norm) *norm = tmp2[j];
1891       }
1892       ierr = PetscFree(tmp);CHKERRQ(ierr);
1893       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1894     } else if (type == NORM_INFINITY) { /* max row norm */
1895       PetscReal ntemp = 0.0;
1896       for (j=0; j<aij->A->rmap->n; j++) {
1897         v   = amat->a + amat->i[j];
1898         sum = 0.0;
1899         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1900           sum += PetscAbsScalar(*v); v++;
1901         }
1902         v = bmat->a + bmat->i[j];
1903         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1904           sum += PetscAbsScalar(*v); v++;
1905         }
1906         if (sum > ntemp) ntemp = sum;
1907       }
1908       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1909     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatTranspose_MPIAIJ"
1916 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1917 {
1918   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1919   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1920   PetscErrorCode ierr;
1921   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1922   PetscInt       cstart = A->cmap->rstart,ncol;
1923   Mat            B;
1924   MatScalar      *array;
1925 
1926   PetscFunctionBegin;
1927   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1928 
1929   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1930   ai = Aloc->i; aj = Aloc->j;
1931   bi = Bloc->i; bj = Bloc->j;
1932   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1933     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1934     PetscSFNode          *oloc;
1935     PETSC_UNUSED PetscSF sf;
1936 
1937     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1938     /* compute d_nnz for preallocation */
1939     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1940     for (i=0; i<ai[ma]; i++) {
1941       d_nnz[aj[i]]++;
1942       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1943     }
1944     /* compute local off-diagonal contributions */
1945     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1946     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1947     /* map those to global */
1948     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1949     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1950     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1951     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1952     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1953     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1954     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1955 
1956     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1957     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1958     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1959     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1960     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1961     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1962   } else {
1963     B    = *matout;
1964     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1965     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1966   }
1967 
1968   /* copy over the A part */
1969   array = Aloc->a;
1970   row   = A->rmap->rstart;
1971   for (i=0; i<ma; i++) {
1972     ncol = ai[i+1]-ai[i];
1973     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1974     row++;
1975     array += ncol; aj += ncol;
1976   }
1977   aj = Aloc->j;
1978   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1979 
1980   /* copy over the B part */
1981   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1982   array = Bloc->a;
1983   row   = A->rmap->rstart;
1984   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1985   cols_tmp = cols;
1986   for (i=0; i<mb; i++) {
1987     ncol = bi[i+1]-bi[i];
1988     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1989     row++;
1990     array += ncol; cols_tmp += ncol;
1991   }
1992   ierr = PetscFree(cols);CHKERRQ(ierr);
1993 
1994   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1997     *matout = B;
1998   } else {
1999     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2007 {
2008   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2009   Mat            a    = aij->A,b = aij->B;
2010   PetscErrorCode ierr;
2011   PetscInt       s1,s2,s3;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2015   if (rr) {
2016     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2017     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2018     /* Overlap communication with computation. */
2019     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2020   }
2021   if (ll) {
2022     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2023     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2024     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2025   }
2026   /* scale  the diagonal block */
2027   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2028 
2029   if (rr) {
2030     /* Do a scatter end and then right scale the off-diagonal block */
2031     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2032     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2039 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2040 {
2041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 #undef __FUNCT__
2050 #define __FUNCT__ "MatEqual_MPIAIJ"
2051 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2052 {
2053   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2054   Mat            a,b,c,d;
2055   PetscBool      flg;
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   a = matA->A; b = matA->B;
2060   c = matB->A; d = matB->B;
2061 
2062   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2063   if (flg) {
2064     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2065   }
2066   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatCopy_MPIAIJ"
2072 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2073 {
2074   PetscErrorCode ierr;
2075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2076   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2077 
2078   PetscFunctionBegin;
2079   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2080   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2081     /* because of the column compression in the off-processor part of the matrix a->B,
2082        the number of columns in a->B and b->B may be different, hence we cannot call
2083        the MatCopy() directly on the two parts. If need be, we can provide a more
2084        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2085        then copying the submatrices */
2086     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2087   } else {
2088     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2089     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2090   }
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 #undef __FUNCT__
2095 #define __FUNCT__ "MatSetUp_MPIAIJ"
2096 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2097 {
2098   PetscErrorCode ierr;
2099 
2100   PetscFunctionBegin;
2101   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2109 {
2110   PetscInt       i,m=Y->rmap->N;
2111   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2112   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2113   const PetscInt *xi = x->i,*yi = y->i;
2114 
2115   PetscFunctionBegin;
2116   /* Set the number of nonzeros in the new matrix */
2117   for (i=0; i<m; i++) {
2118     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2119     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2120     nnz[i] = 0;
2121     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2122       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2123       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2124       nnz[i]++;
2125     }
2126     for (; k<nzy; k++) nnz[i]++;
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatAXPY_MPIAIJ"
2133 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2134 {
2135   PetscErrorCode ierr;
2136   PetscInt       i;
2137   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2138   PetscBLASInt   bnz,one=1;
2139   Mat_SeqAIJ     *x,*y;
2140 
2141   PetscFunctionBegin;
2142   if (str == SAME_NONZERO_PATTERN) {
2143     PetscScalar alpha = a;
2144     x    = (Mat_SeqAIJ*)xx->A->data;
2145     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2146     y    = (Mat_SeqAIJ*)yy->A->data;
2147     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2148     x    = (Mat_SeqAIJ*)xx->B->data;
2149     y    = (Mat_SeqAIJ*)yy->B->data;
2150     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2151     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2152     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2153   } else if (str == SUBSET_NONZERO_PATTERN) {
2154     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2155 
2156     x = (Mat_SeqAIJ*)xx->B->data;
2157     y = (Mat_SeqAIJ*)yy->B->data;
2158     if (y->xtoy && y->XtoY != xx->B) {
2159       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2160       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2161     }
2162     if (!y->xtoy) { /* get xtoy */
2163       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2164       y->XtoY = xx->B;
2165       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2166     }
2167     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2168     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2169   } else {
2170     Mat      B;
2171     PetscInt *nnz_d,*nnz_o;
2172     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2173     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2174     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2175     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2176     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2177     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2178     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2179     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2180     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2181     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2182     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2183     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2184     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2185     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2186   }
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatConjugate_MPIAIJ"
2194 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2195 {
2196 #if defined(PETSC_USE_COMPLEX)
2197   PetscErrorCode ierr;
2198   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2199 
2200   PetscFunctionBegin;
2201   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2202   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2203 #else
2204   PetscFunctionBegin;
2205 #endif
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatRealPart_MPIAIJ"
2211 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2212 {
2213   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2214   PetscErrorCode ierr;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2218   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 #undef __FUNCT__
2223 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2224 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2225 {
2226   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2227   PetscErrorCode ierr;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2231   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 #if defined(PETSC_HAVE_PBGL)
2236 
2237 #include <boost/parallel/mpi/bsp_process_group.hpp>
2238 #include <boost/graph/distributed/ilu_default_graph.hpp>
2239 #include <boost/graph/distributed/ilu_0_block.hpp>
2240 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2241 #include <boost/graph/distributed/petsc/interface.hpp>
2242 #include <boost/multi_array.hpp>
2243 #include <boost/parallel/distributed_property_map->hpp>
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2247 /*
2248   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2249 */
2250 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2251 {
2252   namespace petsc = boost::distributed::petsc;
2253 
2254   namespace graph_dist = boost::graph::distributed;
2255   using boost::graph::distributed::ilu_default::process_group_type;
2256   using boost::graph::ilu_permuted;
2257 
2258   PetscBool      row_identity, col_identity;
2259   PetscContainer c;
2260   PetscInt       m, n, M, N;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2265   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2266   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2267   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2268 
2269   process_group_type pg;
2270   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2271   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2272   lgraph_type& level_graph = *lgraph_p;
2273   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2274 
2275   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2276   ilu_permuted(level_graph);
2277 
2278   /* put together the new matrix */
2279   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2280   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2281   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2282   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2283   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2284   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2285   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2287 
2288   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2289   ierr = PetscContainerSetPointer(c, lgraph_p);
2290   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2291   ierr = PetscContainerDestroy(&c);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2297 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2298 {
2299   PetscFunctionBegin;
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatSolve_MPIAIJ"
2305 /*
2306   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2307 */
2308 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2309 {
2310   namespace graph_dist = boost::graph::distributed;
2311 
2312   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2313   lgraph_type    *lgraph_p;
2314   PetscContainer c;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2319   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2320   ierr = VecCopy(b, x);CHKERRQ(ierr);
2321 
2322   PetscScalar *array_x;
2323   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2324   PetscInt sx;
2325   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2326 
2327   PetscScalar *array_b;
2328   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2329   PetscInt sb;
2330   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2331 
2332   lgraph_type& level_graph = *lgraph_p;
2333   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2334 
2335   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2336   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2337   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2338 
2339   typedef boost::iterator_property_map<array_ref_type::iterator,
2340                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2341   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2342   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2343 
2344   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2345   PetscFunctionReturn(0);
2346 }
2347 #endif
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatDestroy_MatRedundant"
2351 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2352 {
2353   PetscErrorCode ierr;
2354   Mat_Redundant  *redund;
2355   PetscInt       i;
2356   PetscMPIInt    size;
2357 
2358   PetscFunctionBegin;
2359   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2360   if (size == 1) {
2361     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2362     redund = a->redundant;
2363   } else {
2364     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2365     redund = a->redundant;
2366   }
2367   if (redund){
2368     if (redund->matseq) { /* via MatGetSubMatrices()  */
2369       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2370       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2371       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2372       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2373     } else {
2374       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2375       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2376       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2377       for (i=0; i<redund->nrecvs; i++) {
2378         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2379         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2380       }
2381       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2382     }
2383 
2384     if (redund->psubcomm) {
2385       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2386     }
2387     ierr = redund->Destroy(A);CHKERRQ(ierr);
2388     ierr = PetscFree(redund);CHKERRQ(ierr);
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2395 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2396 {
2397   PetscMPIInt    rank,size;
2398   MPI_Comm       comm;
2399   PetscErrorCode ierr;
2400   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2401   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2402   PetscInt       *rowrange = mat->rmap->range;
2403   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2404   Mat            A = aij->A,B=aij->B,C=*matredundant;
2405   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2406   PetscScalar    *sbuf_a;
2407   PetscInt       nzlocal=a->nz+b->nz;
2408   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2409   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2410   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2411   MatScalar      *aworkA,*aworkB;
2412   PetscScalar    *vals;
2413   PetscMPIInt    tag1,tag2,tag3,imdex;
2414   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2415   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2416   MPI_Status     recv_status,*send_status;
2417   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2418   PetscInt       **rbuf_j=NULL;
2419   PetscScalar    **rbuf_a=NULL;
2420   Mat_Redundant  *redund =NULL;
2421 
2422   PetscFunctionBegin;
2423   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2424   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2425   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2426   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2427   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2428 
2429   if (reuse == MAT_REUSE_MATRIX) {
2430     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2431     if (subsize == 1) {
2432       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2433       redund = c->redundant;
2434     } else {
2435       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2436       redund = c->redundant;
2437     }
2438     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2439 
2440     nsends    = redund->nsends;
2441     nrecvs    = redund->nrecvs;
2442     send_rank = redund->send_rank;
2443     recv_rank = redund->recv_rank;
2444     sbuf_nz   = redund->sbuf_nz;
2445     rbuf_nz   = redund->rbuf_nz;
2446     sbuf_j    = redund->sbuf_j;
2447     sbuf_a    = redund->sbuf_a;
2448     rbuf_j    = redund->rbuf_j;
2449     rbuf_a    = redund->rbuf_a;
2450   }
2451 
2452   if (reuse == MAT_INITIAL_MATRIX) {
2453     PetscInt    nleftover,np_subcomm;
2454 
2455     /* get the destination processors' id send_rank, nsends and nrecvs */
2456     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2457 
2458     np_subcomm = size/nsubcomm;
2459     nleftover  = size - nsubcomm*np_subcomm;
2460 
2461     /* block of codes below is specific for INTERLACED */
2462     /* ------------------------------------------------*/
2463     nsends = 0; nrecvs = 0;
2464     for (i=0; i<size; i++) {
2465       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2466         send_rank[nsends++] = i;
2467         recv_rank[nrecvs++] = i;
2468       }
2469     }
2470     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2471       i = size-nleftover-1;
2472       j = 0;
2473       while (j < nsubcomm - nleftover) {
2474         send_rank[nsends++] = i;
2475         i--; j++;
2476       }
2477     }
2478 
2479     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2480       for (i=0; i<nleftover; i++) {
2481         recv_rank[nrecvs++] = size-nleftover+i;
2482       }
2483     }
2484     /*----------------------------------------------*/
2485 
2486     /* allocate sbuf_j, sbuf_a */
2487     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2488     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2489     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2490     /*
2491     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2492     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2493      */
2494   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2495 
2496   /* copy mat's local entries into the buffers */
2497   if (reuse == MAT_INITIAL_MATRIX) {
2498     rownz_max = 0;
2499     rptr      = sbuf_j;
2500     cols      = sbuf_j + rend-rstart + 1;
2501     vals      = sbuf_a;
2502     rptr[0]   = 0;
2503     for (i=0; i<rend-rstart; i++) {
2504       row    = i + rstart;
2505       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2506       ncols  = nzA + nzB;
2507       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2508       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2509       /* load the column indices for this row into cols */
2510       lwrite = 0;
2511       for (l=0; l<nzB; l++) {
2512         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2513           vals[lwrite]   = aworkB[l];
2514           cols[lwrite++] = ctmp;
2515         }
2516       }
2517       for (l=0; l<nzA; l++) {
2518         vals[lwrite]   = aworkA[l];
2519         cols[lwrite++] = cstart + cworkA[l];
2520       }
2521       for (l=0; l<nzB; l++) {
2522         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2523           vals[lwrite]   = aworkB[l];
2524           cols[lwrite++] = ctmp;
2525         }
2526       }
2527       vals     += ncols;
2528       cols     += ncols;
2529       rptr[i+1] = rptr[i] + ncols;
2530       if (rownz_max < ncols) rownz_max = ncols;
2531     }
2532     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2533   } else { /* only copy matrix values into sbuf_a */
2534     rptr    = sbuf_j;
2535     vals    = sbuf_a;
2536     rptr[0] = 0;
2537     for (i=0; i<rend-rstart; i++) {
2538       row    = i + rstart;
2539       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2540       ncols  = nzA + nzB;
2541       cworkB = b->j + b->i[i];
2542       aworkA = a->a + a->i[i];
2543       aworkB = b->a + b->i[i];
2544       lwrite = 0;
2545       for (l=0; l<nzB; l++) {
2546         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2547       }
2548       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2549       for (l=0; l<nzB; l++) {
2550         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2551       }
2552       vals     += ncols;
2553       rptr[i+1] = rptr[i] + ncols;
2554     }
2555   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2556 
2557   /* send nzlocal to others, and recv other's nzlocal */
2558   /*--------------------------------------------------*/
2559   if (reuse == MAT_INITIAL_MATRIX) {
2560     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2561 
2562     s_waits2 = s_waits3 + nsends;
2563     s_waits1 = s_waits2 + nsends;
2564     r_waits1 = s_waits1 + nsends;
2565     r_waits2 = r_waits1 + nrecvs;
2566     r_waits3 = r_waits2 + nrecvs;
2567   } else {
2568     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2569 
2570     r_waits3 = s_waits3 + nsends;
2571   }
2572 
2573   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2574   if (reuse == MAT_INITIAL_MATRIX) {
2575     /* get new tags to keep the communication clean */
2576     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2577     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2578     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2579 
2580     /* post receives of other's nzlocal */
2581     for (i=0; i<nrecvs; i++) {
2582       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2583     }
2584     /* send nzlocal to others */
2585     for (i=0; i<nsends; i++) {
2586       sbuf_nz[i] = nzlocal;
2587       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2588     }
2589     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2590     count = nrecvs;
2591     while (count) {
2592       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2593 
2594       recv_rank[imdex] = recv_status.MPI_SOURCE;
2595       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2596       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2597 
2598       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2599 
2600       rbuf_nz[imdex] += i + 2;
2601 
2602       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2603       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2604       count--;
2605     }
2606     /* wait on sends of nzlocal */
2607     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2608     /* send mat->i,j to others, and recv from other's */
2609     /*------------------------------------------------*/
2610     for (i=0; i<nsends; i++) {
2611       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2612       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2613     }
2614     /* wait on receives of mat->i,j */
2615     /*------------------------------*/
2616     count = nrecvs;
2617     while (count) {
2618       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2619       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2620       count--;
2621     }
2622     /* wait on sends of mat->i,j */
2623     /*---------------------------*/
2624     if (nsends) {
2625       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2626     }
2627   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2628 
2629   /* post receives, send and receive mat->a */
2630   /*----------------------------------------*/
2631   for (imdex=0; imdex<nrecvs; imdex++) {
2632     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2633   }
2634   for (i=0; i<nsends; i++) {
2635     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2636   }
2637   count = nrecvs;
2638   while (count) {
2639     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2640     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2641     count--;
2642   }
2643   if (nsends) {
2644     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2645   }
2646 
2647   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2648 
2649   /* create redundant matrix */
2650   /*-------------------------*/
2651   if (reuse == MAT_INITIAL_MATRIX) {
2652     const PetscInt *range;
2653     PetscInt       rstart_sub,rend_sub,mloc_sub;
2654 
2655     /* compute rownz_max for preallocation */
2656     for (imdex=0; imdex<nrecvs; imdex++) {
2657       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2658       rptr = rbuf_j[imdex];
2659       for (i=0; i<j; i++) {
2660         ncols = rptr[i+1] - rptr[i];
2661         if (rownz_max < ncols) rownz_max = ncols;
2662       }
2663     }
2664 
2665     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2666 
2667     /* get local size of redundant matrix
2668        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2669     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2670     rstart_sub = range[nsubcomm*subrank];
2671     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2672       rend_sub = range[nsubcomm*(subrank+1)];
2673     } else {
2674       rend_sub = mat->rmap->N;
2675     }
2676     mloc_sub = rend_sub - rstart_sub;
2677 
2678     if (M == N) {
2679       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2680     } else { /* non-square matrix */
2681       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2682     }
2683     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2684     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2685     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2686     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2687   } else {
2688     C = *matredundant;
2689   }
2690 
2691   /* insert local matrix entries */
2692   rptr = sbuf_j;
2693   cols = sbuf_j + rend-rstart + 1;
2694   vals = sbuf_a;
2695   for (i=0; i<rend-rstart; i++) {
2696     row   = i + rstart;
2697     ncols = rptr[i+1] - rptr[i];
2698     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2699     vals += ncols;
2700     cols += ncols;
2701   }
2702   /* insert received matrix entries */
2703   for (imdex=0; imdex<nrecvs; imdex++) {
2704     rstart = rowrange[recv_rank[imdex]];
2705     rend   = rowrange[recv_rank[imdex]+1];
2706     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2707     rptr   = rbuf_j[imdex];
2708     cols   = rbuf_j[imdex] + rend-rstart + 1;
2709     vals   = rbuf_a[imdex];
2710     for (i=0; i<rend-rstart; i++) {
2711       row   = i + rstart;
2712       ncols = rptr[i+1] - rptr[i];
2713       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2714       vals += ncols;
2715       cols += ncols;
2716     }
2717   }
2718   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2719   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2720 
2721   if (reuse == MAT_INITIAL_MATRIX) {
2722     *matredundant = C;
2723 
2724     /* create a supporting struct and attach it to C for reuse */
2725     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2726     if (subsize == 1) {
2727       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2728       c->redundant = redund;
2729     } else {
2730       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2731       c->redundant = redund;
2732     }
2733 
2734     redund->nzlocal   = nzlocal;
2735     redund->nsends    = nsends;
2736     redund->nrecvs    = nrecvs;
2737     redund->send_rank = send_rank;
2738     redund->recv_rank = recv_rank;
2739     redund->sbuf_nz   = sbuf_nz;
2740     redund->rbuf_nz   = rbuf_nz;
2741     redund->sbuf_j    = sbuf_j;
2742     redund->sbuf_a    = sbuf_a;
2743     redund->rbuf_j    = rbuf_j;
2744     redund->rbuf_a    = rbuf_a;
2745     redund->psubcomm  = NULL;
2746 
2747     redund->Destroy = C->ops->destroy;
2748     C->ops->destroy = MatDestroy_MatRedundant;
2749   }
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2755 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2756 {
2757   PetscErrorCode ierr;
2758   MPI_Comm       comm;
2759   PetscMPIInt    size,subsize;
2760   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2761   Mat_Redundant  *redund=NULL;
2762   PetscSubcomm   psubcomm=NULL;
2763   MPI_Comm       subcomm_in=subcomm;
2764   Mat            *matseq;
2765   IS             isrow,iscol;
2766 
2767   PetscFunctionBegin;
2768   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2769     if (reuse ==  MAT_INITIAL_MATRIX) {
2770       /* create psubcomm, then get subcomm */
2771       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2772       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2773       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2774 
2775       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2776       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2777       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2778       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2779       subcomm = psubcomm->comm;
2780     } else { /* retrieve psubcomm and subcomm */
2781       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2782       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2783       if (subsize == 1) {
2784         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2785         redund = c->redundant;
2786       } else {
2787         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2788         redund = c->redundant;
2789       }
2790       psubcomm = redund->psubcomm;
2791     }
2792     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2793       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2794       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2795         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2796         if (subsize == 1) {
2797           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2798           c->redundant->psubcomm = psubcomm;
2799         } else {
2800           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2801           c->redundant->psubcomm = psubcomm ;
2802         }
2803       }
2804       PetscFunctionReturn(0);
2805     }
2806   }
2807 
2808   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2809   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2810   if (reuse == MAT_INITIAL_MATRIX) {
2811     /* create a local sequential matrix matseq[0] */
2812     mloc_sub = PETSC_DECIDE;
2813     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2814     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2815     rstart = rend - mloc_sub;
2816     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2817     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2818   } else { /* reuse == MAT_REUSE_MATRIX */
2819     if (subsize == 1) {
2820       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2821       redund = c->redundant;
2822     } else {
2823       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2824       redund = c->redundant;
2825     }
2826 
2827     isrow  = redund->isrow;
2828     iscol  = redund->iscol;
2829     matseq = redund->matseq;
2830   }
2831   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2832   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2833 
2834   if (reuse == MAT_INITIAL_MATRIX) {
2835     /* create a supporting struct and attach it to C for reuse */
2836     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2837     if (subsize == 1) {
2838       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2839       c->redundant = redund;
2840     } else {
2841       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2842       c->redundant = redund;
2843     }
2844     redund->isrow    = isrow;
2845     redund->iscol    = iscol;
2846     redund->matseq   = matseq;
2847     redund->psubcomm = psubcomm;
2848     redund->Destroy               = (*matredundant)->ops->destroy;
2849     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2850   }
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2856 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2857 {
2858   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2859   PetscErrorCode ierr;
2860   PetscInt       i,*idxb = 0;
2861   PetscScalar    *va,*vb;
2862   Vec            vtmp;
2863 
2864   PetscFunctionBegin;
2865   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2866   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2867   if (idx) {
2868     for (i=0; i<A->rmap->n; i++) {
2869       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2870     }
2871   }
2872 
2873   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2874   if (idx) {
2875     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2876   }
2877   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2878   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2879 
2880   for (i=0; i<A->rmap->n; i++) {
2881     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2882       va[i] = vb[i];
2883       if (idx) idx[i] = a->garray[idxb[i]];
2884     }
2885   }
2886 
2887   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2888   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2889   ierr = PetscFree(idxb);CHKERRQ(ierr);
2890   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2896 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2897 {
2898   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2899   PetscErrorCode ierr;
2900   PetscInt       i,*idxb = 0;
2901   PetscScalar    *va,*vb;
2902   Vec            vtmp;
2903 
2904   PetscFunctionBegin;
2905   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2906   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2907   if (idx) {
2908     for (i=0; i<A->cmap->n; i++) {
2909       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2910     }
2911   }
2912 
2913   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2914   if (idx) {
2915     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2916   }
2917   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2918   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2919 
2920   for (i=0; i<A->rmap->n; i++) {
2921     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2922       va[i] = vb[i];
2923       if (idx) idx[i] = a->garray[idxb[i]];
2924     }
2925   }
2926 
2927   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2928   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2929   ierr = PetscFree(idxb);CHKERRQ(ierr);
2930   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2936 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2937 {
2938   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2939   PetscInt       n      = A->rmap->n;
2940   PetscInt       cstart = A->cmap->rstart;
2941   PetscInt       *cmap  = mat->garray;
2942   PetscInt       *diagIdx, *offdiagIdx;
2943   Vec            diagV, offdiagV;
2944   PetscScalar    *a, *diagA, *offdiagA;
2945   PetscInt       r;
2946   PetscErrorCode ierr;
2947 
2948   PetscFunctionBegin;
2949   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2950   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2951   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2952   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2953   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2954   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2955   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2956   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2957   for (r = 0; r < n; ++r) {
2958     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2959       a[r]   = diagA[r];
2960       idx[r] = cstart + diagIdx[r];
2961     } else {
2962       a[r]   = offdiagA[r];
2963       idx[r] = cmap[offdiagIdx[r]];
2964     }
2965   }
2966   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2967   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2968   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2969   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2970   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2971   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 #undef __FUNCT__
2976 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2977 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2978 {
2979   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2980   PetscInt       n      = A->rmap->n;
2981   PetscInt       cstart = A->cmap->rstart;
2982   PetscInt       *cmap  = mat->garray;
2983   PetscInt       *diagIdx, *offdiagIdx;
2984   Vec            diagV, offdiagV;
2985   PetscScalar    *a, *diagA, *offdiagA;
2986   PetscInt       r;
2987   PetscErrorCode ierr;
2988 
2989   PetscFunctionBegin;
2990   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2991   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2992   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2993   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2994   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2995   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2996   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2997   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2998   for (r = 0; r < n; ++r) {
2999     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3000       a[r]   = diagA[r];
3001       idx[r] = cstart + diagIdx[r];
3002     } else {
3003       a[r]   = offdiagA[r];
3004       idx[r] = cmap[offdiagIdx[r]];
3005     }
3006   }
3007   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3008   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3009   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3010   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3011   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3012   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3018 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3019 {
3020   PetscErrorCode ierr;
3021   Mat            *dummy;
3022 
3023   PetscFunctionBegin;
3024   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3025   *newmat = *dummy;
3026   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 #undef __FUNCT__
3031 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3032 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3033 {
3034   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3035   PetscErrorCode ierr;
3036 
3037   PetscFunctionBegin;
3038   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 #undef __FUNCT__
3043 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3044 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3045 {
3046   PetscErrorCode ierr;
3047   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3048 
3049   PetscFunctionBegin;
3050   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3051   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3052   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 /* -------------------------------------------------------------------*/
3058 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3059                                        MatGetRow_MPIAIJ,
3060                                        MatRestoreRow_MPIAIJ,
3061                                        MatMult_MPIAIJ,
3062                                 /* 4*/ MatMultAdd_MPIAIJ,
3063                                        MatMultTranspose_MPIAIJ,
3064                                        MatMultTransposeAdd_MPIAIJ,
3065 #if defined(PETSC_HAVE_PBGL)
3066                                        MatSolve_MPIAIJ,
3067 #else
3068                                        0,
3069 #endif
3070                                        0,
3071                                        0,
3072                                 /*10*/ 0,
3073                                        0,
3074                                        0,
3075                                        MatSOR_MPIAIJ,
3076                                        MatTranspose_MPIAIJ,
3077                                 /*15*/ MatGetInfo_MPIAIJ,
3078                                        MatEqual_MPIAIJ,
3079                                        MatGetDiagonal_MPIAIJ,
3080                                        MatDiagonalScale_MPIAIJ,
3081                                        MatNorm_MPIAIJ,
3082                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3083                                        MatAssemblyEnd_MPIAIJ,
3084                                        MatSetOption_MPIAIJ,
3085                                        MatZeroEntries_MPIAIJ,
3086                                 /*24*/ MatZeroRows_MPIAIJ,
3087                                        0,
3088 #if defined(PETSC_HAVE_PBGL)
3089                                        0,
3090 #else
3091                                        0,
3092 #endif
3093                                        0,
3094                                        0,
3095                                 /*29*/ MatSetUp_MPIAIJ,
3096 #if defined(PETSC_HAVE_PBGL)
3097                                        0,
3098 #else
3099                                        0,
3100 #endif
3101                                        0,
3102                                        0,
3103                                        0,
3104                                 /*34*/ MatDuplicate_MPIAIJ,
3105                                        0,
3106                                        0,
3107                                        0,
3108                                        0,
3109                                 /*39*/ MatAXPY_MPIAIJ,
3110                                        MatGetSubMatrices_MPIAIJ,
3111                                        MatIncreaseOverlap_MPIAIJ,
3112                                        MatGetValues_MPIAIJ,
3113                                        MatCopy_MPIAIJ,
3114                                 /*44*/ MatGetRowMax_MPIAIJ,
3115                                        MatScale_MPIAIJ,
3116                                        0,
3117                                        0,
3118                                        MatZeroRowsColumns_MPIAIJ,
3119                                 /*49*/ MatSetRandom_MPIAIJ,
3120                                        0,
3121                                        0,
3122                                        0,
3123                                        0,
3124                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3125                                        0,
3126                                        MatSetUnfactored_MPIAIJ,
3127                                        MatPermute_MPIAIJ,
3128                                        0,
3129                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3130                                        MatDestroy_MPIAIJ,
3131                                        MatView_MPIAIJ,
3132                                        0,
3133                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3134                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3135                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3136                                        0,
3137                                        0,
3138                                        0,
3139                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3140                                        MatGetRowMinAbs_MPIAIJ,
3141                                        0,
3142                                        MatSetColoring_MPIAIJ,
3143                                        0,
3144                                        MatSetValuesAdifor_MPIAIJ,
3145                                 /*75*/ MatFDColoringApply_AIJ,
3146                                        0,
3147                                        0,
3148                                        0,
3149                                        MatFindZeroDiagonals_MPIAIJ,
3150                                 /*80*/ 0,
3151                                        0,
3152                                        0,
3153                                 /*83*/ MatLoad_MPIAIJ,
3154                                        0,
3155                                        0,
3156                                        0,
3157                                        0,
3158                                        0,
3159                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3160                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3161                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3162                                        MatPtAP_MPIAIJ_MPIAIJ,
3163                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3164                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3165                                        0,
3166                                        0,
3167                                        0,
3168                                        0,
3169                                 /*99*/ 0,
3170                                        0,
3171                                        0,
3172                                        MatConjugate_MPIAIJ,
3173                                        0,
3174                                 /*104*/MatSetValuesRow_MPIAIJ,
3175                                        MatRealPart_MPIAIJ,
3176                                        MatImaginaryPart_MPIAIJ,
3177                                        0,
3178                                        0,
3179                                 /*109*/0,
3180                                        MatGetRedundantMatrix_MPIAIJ,
3181                                        MatGetRowMin_MPIAIJ,
3182                                        0,
3183                                        0,
3184                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3185                                        0,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                 /*119*/0,
3190                                        0,
3191                                        0,
3192                                        0,
3193                                        MatGetMultiProcBlock_MPIAIJ,
3194                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3195                                        MatGetColumnNorms_MPIAIJ,
3196                                        MatInvertBlockDiagonal_MPIAIJ,
3197                                        0,
3198                                        MatGetSubMatricesParallel_MPIAIJ,
3199                                 /*129*/0,
3200                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3201                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3202                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3203                                        0,
3204                                 /*134*/0,
3205                                        0,
3206                                        0,
3207                                        0,
3208                                        0,
3209                                 /*139*/0,
3210                                        0,
3211                                        0,
3212                                        MatFDColoringSetUp_MPIXAIJ
3213 };
3214 
3215 /* ----------------------------------------------------------------------------------------*/
3216 
3217 #undef __FUNCT__
3218 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3219 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3220 {
3221   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3226   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 #undef __FUNCT__
3231 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3232 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3233 {
3234   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3235   PetscErrorCode ierr;
3236 
3237   PetscFunctionBegin;
3238   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3239   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3240   PetscFunctionReturn(0);
3241 }
3242 
3243 #undef __FUNCT__
3244 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3245 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3246 {
3247   Mat_MPIAIJ     *b;
3248   PetscErrorCode ierr;
3249 
3250   PetscFunctionBegin;
3251   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3252   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3253   b = (Mat_MPIAIJ*)B->data;
3254 
3255   if (!B->preallocated) {
3256     /* Explicitly create 2 MATSEQAIJ matrices. */
3257     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3258     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3259     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3260     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3261     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3262     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3263     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3264     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3265     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3266     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3267   }
3268 
3269   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3270   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3271   B->preallocated = PETSC_TRUE;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 #undef __FUNCT__
3276 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3277 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3278 {
3279   Mat            mat;
3280   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3281   PetscErrorCode ierr;
3282 
3283   PetscFunctionBegin;
3284   *newmat = 0;
3285   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3286   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3287   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3288   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3289   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3290   a       = (Mat_MPIAIJ*)mat->data;
3291 
3292   mat->factortype   = matin->factortype;
3293   mat->assembled    = PETSC_TRUE;
3294   mat->insertmode   = NOT_SET_VALUES;
3295   mat->preallocated = PETSC_TRUE;
3296 
3297   a->size         = oldmat->size;
3298   a->rank         = oldmat->rank;
3299   a->donotstash   = oldmat->donotstash;
3300   a->roworiented  = oldmat->roworiented;
3301   a->rowindices   = 0;
3302   a->rowvalues    = 0;
3303   a->getrowactive = PETSC_FALSE;
3304 
3305   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3306   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3307 
3308   if (oldmat->colmap) {
3309 #if defined(PETSC_USE_CTABLE)
3310     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3311 #else
3312     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3313     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3314     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3315 #endif
3316   } else a->colmap = 0;
3317   if (oldmat->garray) {
3318     PetscInt len;
3319     len  = oldmat->B->cmap->n;
3320     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3321     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3322     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3323   } else a->garray = 0;
3324 
3325   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3326   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3327   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3328   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3329   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3330   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3331   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3332   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3333   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3334   *newmat = mat;
3335   PetscFunctionReturn(0);
3336 }
3337 
3338 
3339 
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatLoad_MPIAIJ"
3342 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3343 {
3344   PetscScalar    *vals,*svals;
3345   MPI_Comm       comm;
3346   PetscErrorCode ierr;
3347   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3348   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3349   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3350   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3351   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3352   int            fd;
3353   PetscInt       bs = 1;
3354 
3355   PetscFunctionBegin;
3356   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3357   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3358   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3359   if (!rank) {
3360     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3361     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3362     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3363   }
3364 
3365   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3366   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3367   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3368 
3369   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3370 
3371   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3372   M    = header[1]; N = header[2];
3373   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3374   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3375   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3376 
3377   /* If global sizes are set, check if they are consistent with that given in the file */
3378   if (sizesset) {
3379     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3380   }
3381   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3382   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3383 
3384   /* determine ownership of all (block) rows */
3385   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3386   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3387   else m = newMat->rmap->n; /* Set by user */
3388 
3389   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3390   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3391 
3392   /* First process needs enough room for process with most rows */
3393   if (!rank) {
3394     mmax = rowners[1];
3395     for (i=2; i<=size; i++) {
3396       mmax = PetscMax(mmax, rowners[i]);
3397     }
3398   } else mmax = -1;             /* unused, but compilers complain */
3399 
3400   rowners[0] = 0;
3401   for (i=2; i<=size; i++) {
3402     rowners[i] += rowners[i-1];
3403   }
3404   rstart = rowners[rank];
3405   rend   = rowners[rank+1];
3406 
3407   /* distribute row lengths to all processors */
3408   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3409   if (!rank) {
3410     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3411     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3412     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3413     for (j=0; j<m; j++) {
3414       procsnz[0] += ourlens[j];
3415     }
3416     for (i=1; i<size; i++) {
3417       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3418       /* calculate the number of nonzeros on each processor */
3419       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3420         procsnz[i] += rowlengths[j];
3421       }
3422       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3423     }
3424     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3425   } else {
3426     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3427   }
3428 
3429   if (!rank) {
3430     /* determine max buffer needed and allocate it */
3431     maxnz = 0;
3432     for (i=0; i<size; i++) {
3433       maxnz = PetscMax(maxnz,procsnz[i]);
3434     }
3435     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3436 
3437     /* read in my part of the matrix column indices  */
3438     nz   = procsnz[0];
3439     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3440     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3441 
3442     /* read in every one elses and ship off */
3443     for (i=1; i<size; i++) {
3444       nz   = procsnz[i];
3445       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3446       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3447     }
3448     ierr = PetscFree(cols);CHKERRQ(ierr);
3449   } else {
3450     /* determine buffer space needed for message */
3451     nz = 0;
3452     for (i=0; i<m; i++) {
3453       nz += ourlens[i];
3454     }
3455     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3456 
3457     /* receive message of column indices*/
3458     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3459   }
3460 
3461   /* determine column ownership if matrix is not square */
3462   if (N != M) {
3463     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3464     else n = newMat->cmap->n;
3465     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3466     cstart = cend - n;
3467   } else {
3468     cstart = rstart;
3469     cend   = rend;
3470     n      = cend - cstart;
3471   }
3472 
3473   /* loop over local rows, determining number of off diagonal entries */
3474   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3475   jj   = 0;
3476   for (i=0; i<m; i++) {
3477     for (j=0; j<ourlens[i]; j++) {
3478       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3479       jj++;
3480     }
3481   }
3482 
3483   for (i=0; i<m; i++) {
3484     ourlens[i] -= offlens[i];
3485   }
3486   if (!sizesset) {
3487     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3488   }
3489 
3490   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3491 
3492   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3493 
3494   for (i=0; i<m; i++) {
3495     ourlens[i] += offlens[i];
3496   }
3497 
3498   if (!rank) {
3499     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3500 
3501     /* read in my part of the matrix numerical values  */
3502     nz   = procsnz[0];
3503     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3504 
3505     /* insert into matrix */
3506     jj      = rstart;
3507     smycols = mycols;
3508     svals   = vals;
3509     for (i=0; i<m; i++) {
3510       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3511       smycols += ourlens[i];
3512       svals   += ourlens[i];
3513       jj++;
3514     }
3515 
3516     /* read in other processors and ship out */
3517     for (i=1; i<size; i++) {
3518       nz   = procsnz[i];
3519       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3520       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3521     }
3522     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3523   } else {
3524     /* receive numeric values */
3525     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3526 
3527     /* receive message of values*/
3528     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3529 
3530     /* insert into matrix */
3531     jj      = rstart;
3532     smycols = mycols;
3533     svals   = vals;
3534     for (i=0; i<m; i++) {
3535       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3536       smycols += ourlens[i];
3537       svals   += ourlens[i];
3538       jj++;
3539     }
3540   }
3541   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3542   ierr = PetscFree(vals);CHKERRQ(ierr);
3543   ierr = PetscFree(mycols);CHKERRQ(ierr);
3544   ierr = PetscFree(rowners);CHKERRQ(ierr);
3545   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3546   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3547   PetscFunctionReturn(0);
3548 }
3549 
3550 #undef __FUNCT__
3551 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3552 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3553 {
3554   PetscErrorCode ierr;
3555   IS             iscol_local;
3556   PetscInt       csize;
3557 
3558   PetscFunctionBegin;
3559   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3560   if (call == MAT_REUSE_MATRIX) {
3561     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3562     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3563   } else {
3564     PetscInt cbs;
3565     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3566     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3567     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3568   }
3569   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3570   if (call == MAT_INITIAL_MATRIX) {
3571     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3572     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3573   }
3574   PetscFunctionReturn(0);
3575 }
3576 
3577 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3578 #undef __FUNCT__
3579 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3580 /*
3581     Not great since it makes two copies of the submatrix, first an SeqAIJ
3582   in local and then by concatenating the local matrices the end result.
3583   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3584 
3585   Note: This requires a sequential iscol with all indices.
3586 */
3587 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3588 {
3589   PetscErrorCode ierr;
3590   PetscMPIInt    rank,size;
3591   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3592   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3593   PetscBool      allcolumns, colflag;
3594   Mat            M,Mreuse;
3595   MatScalar      *vwork,*aa;
3596   MPI_Comm       comm;
3597   Mat_SeqAIJ     *aij;
3598 
3599   PetscFunctionBegin;
3600   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3601   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3603 
3604   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3605   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3606   if (colflag && ncol == mat->cmap->N) {
3607     allcolumns = PETSC_TRUE;
3608   } else {
3609     allcolumns = PETSC_FALSE;
3610   }
3611   if (call ==  MAT_REUSE_MATRIX) {
3612     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3613     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3614     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3615   } else {
3616     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3617   }
3618 
3619   /*
3620       m - number of local rows
3621       n - number of columns (same on all processors)
3622       rstart - first row in new global matrix generated
3623   */
3624   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3625   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3626   if (call == MAT_INITIAL_MATRIX) {
3627     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3628     ii  = aij->i;
3629     jj  = aij->j;
3630 
3631     /*
3632         Determine the number of non-zeros in the diagonal and off-diagonal
3633         portions of the matrix in order to do correct preallocation
3634     */
3635 
3636     /* first get start and end of "diagonal" columns */
3637     if (csize == PETSC_DECIDE) {
3638       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3639       if (mglobal == n) { /* square matrix */
3640         nlocal = m;
3641       } else {
3642         nlocal = n/size + ((n % size) > rank);
3643       }
3644     } else {
3645       nlocal = csize;
3646     }
3647     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3648     rstart = rend - nlocal;
3649     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3650 
3651     /* next, compute all the lengths */
3652     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3653     olens = dlens + m;
3654     for (i=0; i<m; i++) {
3655       jend = ii[i+1] - ii[i];
3656       olen = 0;
3657       dlen = 0;
3658       for (j=0; j<jend; j++) {
3659         if (*jj < rstart || *jj >= rend) olen++;
3660         else dlen++;
3661         jj++;
3662       }
3663       olens[i] = olen;
3664       dlens[i] = dlen;
3665     }
3666     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3667     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3668     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3669     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3670     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3671     ierr = PetscFree(dlens);CHKERRQ(ierr);
3672   } else {
3673     PetscInt ml,nl;
3674 
3675     M    = *newmat;
3676     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3677     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3678     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3679     /*
3680          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3681        rather than the slower MatSetValues().
3682     */
3683     M->was_assembled = PETSC_TRUE;
3684     M->assembled     = PETSC_FALSE;
3685   }
3686   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3687   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3688   ii   = aij->i;
3689   jj   = aij->j;
3690   aa   = aij->a;
3691   for (i=0; i<m; i++) {
3692     row   = rstart + i;
3693     nz    = ii[i+1] - ii[i];
3694     cwork = jj;     jj += nz;
3695     vwork = aa;     aa += nz;
3696     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3697   }
3698 
3699   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701   *newmat = M;
3702 
3703   /* save submatrix used in processor for next request */
3704   if (call ==  MAT_INITIAL_MATRIX) {
3705     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3706     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3707   }
3708   PetscFunctionReturn(0);
3709 }
3710 
3711 #undef __FUNCT__
3712 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3713 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3714 {
3715   PetscInt       m,cstart, cend,j,nnz,i,d;
3716   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3717   const PetscInt *JJ;
3718   PetscScalar    *values;
3719   PetscErrorCode ierr;
3720 
3721   PetscFunctionBegin;
3722   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3723 
3724   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3725   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3726   m      = B->rmap->n;
3727   cstart = B->cmap->rstart;
3728   cend   = B->cmap->rend;
3729   rstart = B->rmap->rstart;
3730 
3731   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3732 
3733 #if defined(PETSC_USE_DEBUGGING)
3734   for (i=0; i<m; i++) {
3735     nnz = Ii[i+1]- Ii[i];
3736     JJ  = J + Ii[i];
3737     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3738     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3739     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3740   }
3741 #endif
3742 
3743   for (i=0; i<m; i++) {
3744     nnz     = Ii[i+1]- Ii[i];
3745     JJ      = J + Ii[i];
3746     nnz_max = PetscMax(nnz_max,nnz);
3747     d       = 0;
3748     for (j=0; j<nnz; j++) {
3749       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3750     }
3751     d_nnz[i] = d;
3752     o_nnz[i] = nnz - d;
3753   }
3754   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3755   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3756 
3757   if (v) values = (PetscScalar*)v;
3758   else {
3759     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3760   }
3761 
3762   for (i=0; i<m; i++) {
3763     ii   = i + rstart;
3764     nnz  = Ii[i+1]- Ii[i];
3765     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3766   }
3767   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3768   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3769 
3770   if (!v) {
3771     ierr = PetscFree(values);CHKERRQ(ierr);
3772   }
3773   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3774   PetscFunctionReturn(0);
3775 }
3776 
3777 #undef __FUNCT__
3778 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3779 /*@
3780    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3781    (the default parallel PETSc format).
3782 
3783    Collective on MPI_Comm
3784 
3785    Input Parameters:
3786 +  B - the matrix
3787 .  i - the indices into j for the start of each local row (starts with zero)
3788 .  j - the column indices for each local row (starts with zero)
3789 -  v - optional values in the matrix
3790 
3791    Level: developer
3792 
3793    Notes:
3794        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3795      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3796      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3797 
3798        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3799 
3800        The format which is used for the sparse matrix input, is equivalent to a
3801     row-major ordering.. i.e for the following matrix, the input data expected is
3802     as shown:
3803 
3804         1 0 0
3805         2 0 3     P0
3806        -------
3807         4 5 6     P1
3808 
3809      Process0 [P0]: rows_owned=[0,1]
3810         i =  {0,1,3}  [size = nrow+1  = 2+1]
3811         j =  {0,0,2}  [size = nz = 6]
3812         v =  {1,2,3}  [size = nz = 6]
3813 
3814      Process1 [P1]: rows_owned=[2]
3815         i =  {0,3}    [size = nrow+1  = 1+1]
3816         j =  {0,1,2}  [size = nz = 6]
3817         v =  {4,5,6}  [size = nz = 6]
3818 
3819 .keywords: matrix, aij, compressed row, sparse, parallel
3820 
3821 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3822           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3823 @*/
3824 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3825 {
3826   PetscErrorCode ierr;
3827 
3828   PetscFunctionBegin;
3829   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 #undef __FUNCT__
3834 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3835 /*@C
3836    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3837    (the default parallel PETSc format).  For good matrix assembly performance
3838    the user should preallocate the matrix storage by setting the parameters
3839    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3840    performance can be increased by more than a factor of 50.
3841 
3842    Collective on MPI_Comm
3843 
3844    Input Parameters:
3845 +  A - the matrix
3846 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3847            (same value is used for all local rows)
3848 .  d_nnz - array containing the number of nonzeros in the various rows of the
3849            DIAGONAL portion of the local submatrix (possibly different for each row)
3850            or NULL, if d_nz is used to specify the nonzero structure.
3851            The size of this array is equal to the number of local rows, i.e 'm'.
3852            For matrices that will be factored, you must leave room for (and set)
3853            the diagonal entry even if it is zero.
3854 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3855            submatrix (same value is used for all local rows).
3856 -  o_nnz - array containing the number of nonzeros in the various rows of the
3857            OFF-DIAGONAL portion of the local submatrix (possibly different for
3858            each row) or NULL, if o_nz is used to specify the nonzero
3859            structure. The size of this array is equal to the number
3860            of local rows, i.e 'm'.
3861 
3862    If the *_nnz parameter is given then the *_nz parameter is ignored
3863 
3864    The AIJ format (also called the Yale sparse matrix format or
3865    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3866    storage.  The stored row and column indices begin with zero.
3867    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3868 
3869    The parallel matrix is partitioned such that the first m0 rows belong to
3870    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3871    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3872 
3873    The DIAGONAL portion of the local submatrix of a processor can be defined
3874    as the submatrix which is obtained by extraction the part corresponding to
3875    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3876    first row that belongs to the processor, r2 is the last row belonging to
3877    the this processor, and c1-c2 is range of indices of the local part of a
3878    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3879    common case of a square matrix, the row and column ranges are the same and
3880    the DIAGONAL part is also square. The remaining portion of the local
3881    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3882 
3883    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3884 
3885    You can call MatGetInfo() to get information on how effective the preallocation was;
3886    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3887    You can also run with the option -info and look for messages with the string
3888    malloc in them to see if additional memory allocation was needed.
3889 
3890    Example usage:
3891 
3892    Consider the following 8x8 matrix with 34 non-zero values, that is
3893    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3894    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3895    as follows:
3896 
3897 .vb
3898             1  2  0  |  0  3  0  |  0  4
3899     Proc0   0  5  6  |  7  0  0  |  8  0
3900             9  0 10  | 11  0  0  | 12  0
3901     -------------------------------------
3902            13  0 14  | 15 16 17  |  0  0
3903     Proc1   0 18  0  | 19 20 21  |  0  0
3904             0  0  0  | 22 23  0  | 24  0
3905     -------------------------------------
3906     Proc2  25 26 27  |  0  0 28  | 29  0
3907            30  0  0  | 31 32 33  |  0 34
3908 .ve
3909 
3910    This can be represented as a collection of submatrices as:
3911 
3912 .vb
3913       A B C
3914       D E F
3915       G H I
3916 .ve
3917 
3918    Where the submatrices A,B,C are owned by proc0, D,E,F are
3919    owned by proc1, G,H,I are owned by proc2.
3920 
3921    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3922    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3923    The 'M','N' parameters are 8,8, and have the same values on all procs.
3924 
3925    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3926    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3927    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3928    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3929    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3930    matrix, ans [DF] as another SeqAIJ matrix.
3931 
3932    When d_nz, o_nz parameters are specified, d_nz storage elements are
3933    allocated for every row of the local diagonal submatrix, and o_nz
3934    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3935    One way to choose d_nz and o_nz is to use the max nonzerors per local
3936    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3937    In this case, the values of d_nz,o_nz are:
3938 .vb
3939      proc0 : dnz = 2, o_nz = 2
3940      proc1 : dnz = 3, o_nz = 2
3941      proc2 : dnz = 1, o_nz = 4
3942 .ve
3943    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3944    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3945    for proc3. i.e we are using 12+15+10=37 storage locations to store
3946    34 values.
3947 
3948    When d_nnz, o_nnz parameters are specified, the storage is specified
3949    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3950    In the above case the values for d_nnz,o_nnz are:
3951 .vb
3952      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3953      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3954      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3955 .ve
3956    Here the space allocated is sum of all the above values i.e 34, and
3957    hence pre-allocation is perfect.
3958 
3959    Level: intermediate
3960 
3961 .keywords: matrix, aij, compressed row, sparse, parallel
3962 
3963 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3964           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3965 @*/
3966 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3967 {
3968   PetscErrorCode ierr;
3969 
3970   PetscFunctionBegin;
3971   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3972   PetscValidType(B,1);
3973   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 #undef __FUNCT__
3978 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3979 /*@
3980      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3981          CSR format the local rows.
3982 
3983    Collective on MPI_Comm
3984 
3985    Input Parameters:
3986 +  comm - MPI communicator
3987 .  m - number of local rows (Cannot be PETSC_DECIDE)
3988 .  n - This value should be the same as the local size used in creating the
3989        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3990        calculated if N is given) For square matrices n is almost always m.
3991 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3992 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3993 .   i - row indices
3994 .   j - column indices
3995 -   a - matrix values
3996 
3997    Output Parameter:
3998 .   mat - the matrix
3999 
4000    Level: intermediate
4001 
4002    Notes:
4003        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4004      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4005      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4006 
4007        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4008 
4009        The format which is used for the sparse matrix input, is equivalent to a
4010     row-major ordering.. i.e for the following matrix, the input data expected is
4011     as shown:
4012 
4013         1 0 0
4014         2 0 3     P0
4015        -------
4016         4 5 6     P1
4017 
4018      Process0 [P0]: rows_owned=[0,1]
4019         i =  {0,1,3}  [size = nrow+1  = 2+1]
4020         j =  {0,0,2}  [size = nz = 6]
4021         v =  {1,2,3}  [size = nz = 6]
4022 
4023      Process1 [P1]: rows_owned=[2]
4024         i =  {0,3}    [size = nrow+1  = 1+1]
4025         j =  {0,1,2}  [size = nz = 6]
4026         v =  {4,5,6}  [size = nz = 6]
4027 
4028 .keywords: matrix, aij, compressed row, sparse, parallel
4029 
4030 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4031           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4032 @*/
4033 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4034 {
4035   PetscErrorCode ierr;
4036 
4037   PetscFunctionBegin;
4038   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4039   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4040   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4041   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4042   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4043   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4044   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4045   PetscFunctionReturn(0);
4046 }
4047 
4048 #undef __FUNCT__
4049 #define __FUNCT__ "MatCreateAIJ"
4050 /*@C
4051    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4052    (the default parallel PETSc format).  For good matrix assembly performance
4053    the user should preallocate the matrix storage by setting the parameters
4054    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4055    performance can be increased by more than a factor of 50.
4056 
4057    Collective on MPI_Comm
4058 
4059    Input Parameters:
4060 +  comm - MPI communicator
4061 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4062            This value should be the same as the local size used in creating the
4063            y vector for the matrix-vector product y = Ax.
4064 .  n - This value should be the same as the local size used in creating the
4065        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4066        calculated if N is given) For square matrices n is almost always m.
4067 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4068 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4069 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4070            (same value is used for all local rows)
4071 .  d_nnz - array containing the number of nonzeros in the various rows of the
4072            DIAGONAL portion of the local submatrix (possibly different for each row)
4073            or NULL, if d_nz is used to specify the nonzero structure.
4074            The size of this array is equal to the number of local rows, i.e 'm'.
4075 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4076            submatrix (same value is used for all local rows).
4077 -  o_nnz - array containing the number of nonzeros in the various rows of the
4078            OFF-DIAGONAL portion of the local submatrix (possibly different for
4079            each row) or NULL, if o_nz is used to specify the nonzero
4080            structure. The size of this array is equal to the number
4081            of local rows, i.e 'm'.
4082 
4083    Output Parameter:
4084 .  A - the matrix
4085 
4086    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4087    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4088    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4089 
4090    Notes:
4091    If the *_nnz parameter is given then the *_nz parameter is ignored
4092 
4093    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4094    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4095    storage requirements for this matrix.
4096 
4097    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4098    processor than it must be used on all processors that share the object for
4099    that argument.
4100 
4101    The user MUST specify either the local or global matrix dimensions
4102    (possibly both).
4103 
4104    The parallel matrix is partitioned across processors such that the
4105    first m0 rows belong to process 0, the next m1 rows belong to
4106    process 1, the next m2 rows belong to process 2 etc.. where
4107    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4108    values corresponding to [m x N] submatrix.
4109 
4110    The columns are logically partitioned with the n0 columns belonging
4111    to 0th partition, the next n1 columns belonging to the next
4112    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4113 
4114    The DIAGONAL portion of the local submatrix on any given processor
4115    is the submatrix corresponding to the rows and columns m,n
4116    corresponding to the given processor. i.e diagonal matrix on
4117    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4118    etc. The remaining portion of the local submatrix [m x (N-n)]
4119    constitute the OFF-DIAGONAL portion. The example below better
4120    illustrates this concept.
4121 
4122    For a square global matrix we define each processor's diagonal portion
4123    to be its local rows and the corresponding columns (a square submatrix);
4124    each processor's off-diagonal portion encompasses the remainder of the
4125    local matrix (a rectangular submatrix).
4126 
4127    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4128 
4129    When calling this routine with a single process communicator, a matrix of
4130    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4131    type of communicator, use the construction mechanism:
4132      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4133 
4134    By default, this format uses inodes (identical nodes) when possible.
4135    We search for consecutive rows with the same nonzero structure, thereby
4136    reusing matrix information to achieve increased efficiency.
4137 
4138    Options Database Keys:
4139 +  -mat_no_inode  - Do not use inodes
4140 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4141 -  -mat_aij_oneindex - Internally use indexing starting at 1
4142         rather than 0.  Note that when calling MatSetValues(),
4143         the user still MUST index entries starting at 0!
4144 
4145 
4146    Example usage:
4147 
4148    Consider the following 8x8 matrix with 34 non-zero values, that is
4149    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4150    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4151    as follows:
4152 
4153 .vb
4154             1  2  0  |  0  3  0  |  0  4
4155     Proc0   0  5  6  |  7  0  0  |  8  0
4156             9  0 10  | 11  0  0  | 12  0
4157     -------------------------------------
4158            13  0 14  | 15 16 17  |  0  0
4159     Proc1   0 18  0  | 19 20 21  |  0  0
4160             0  0  0  | 22 23  0  | 24  0
4161     -------------------------------------
4162     Proc2  25 26 27  |  0  0 28  | 29  0
4163            30  0  0  | 31 32 33  |  0 34
4164 .ve
4165 
4166    This can be represented as a collection of submatrices as:
4167 
4168 .vb
4169       A B C
4170       D E F
4171       G H I
4172 .ve
4173 
4174    Where the submatrices A,B,C are owned by proc0, D,E,F are
4175    owned by proc1, G,H,I are owned by proc2.
4176 
4177    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4178    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4179    The 'M','N' parameters are 8,8, and have the same values on all procs.
4180 
4181    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4182    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4183    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4184    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4185    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4186    matrix, ans [DF] as another SeqAIJ matrix.
4187 
4188    When d_nz, o_nz parameters are specified, d_nz storage elements are
4189    allocated for every row of the local diagonal submatrix, and o_nz
4190    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4191    One way to choose d_nz and o_nz is to use the max nonzerors per local
4192    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4193    In this case, the values of d_nz,o_nz are:
4194 .vb
4195      proc0 : dnz = 2, o_nz = 2
4196      proc1 : dnz = 3, o_nz = 2
4197      proc2 : dnz = 1, o_nz = 4
4198 .ve
4199    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4200    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4201    for proc3. i.e we are using 12+15+10=37 storage locations to store
4202    34 values.
4203 
4204    When d_nnz, o_nnz parameters are specified, the storage is specified
4205    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4206    In the above case the values for d_nnz,o_nnz are:
4207 .vb
4208      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4209      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4210      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4211 .ve
4212    Here the space allocated is sum of all the above values i.e 34, and
4213    hence pre-allocation is perfect.
4214 
4215    Level: intermediate
4216 
4217 .keywords: matrix, aij, compressed row, sparse, parallel
4218 
4219 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4220           MPIAIJ, MatCreateMPIAIJWithArrays()
4221 @*/
4222 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4223 {
4224   PetscErrorCode ierr;
4225   PetscMPIInt    size;
4226 
4227   PetscFunctionBegin;
4228   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4229   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4230   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4231   if (size > 1) {
4232     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4233     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4234   } else {
4235     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4236     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4237   }
4238   PetscFunctionReturn(0);
4239 }
4240 
4241 #undef __FUNCT__
4242 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4243 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4244 {
4245   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4246 
4247   PetscFunctionBegin;
4248   *Ad     = a->A;
4249   *Ao     = a->B;
4250   *colmap = a->garray;
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 #undef __FUNCT__
4255 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4256 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4257 {
4258   PetscErrorCode ierr;
4259   PetscInt       i;
4260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4261 
4262   PetscFunctionBegin;
4263   if (coloring->ctype == IS_COLORING_GLOBAL) {
4264     ISColoringValue *allcolors,*colors;
4265     ISColoring      ocoloring;
4266 
4267     /* set coloring for diagonal portion */
4268     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4269 
4270     /* set coloring for off-diagonal portion */
4271     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4272     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4273     for (i=0; i<a->B->cmap->n; i++) {
4274       colors[i] = allcolors[a->garray[i]];
4275     }
4276     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4277     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4278     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4279     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4280   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4281     ISColoringValue *colors;
4282     PetscInt        *larray;
4283     ISColoring      ocoloring;
4284 
4285     /* set coloring for diagonal portion */
4286     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4287     for (i=0; i<a->A->cmap->n; i++) {
4288       larray[i] = i + A->cmap->rstart;
4289     }
4290     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4291     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4292     for (i=0; i<a->A->cmap->n; i++) {
4293       colors[i] = coloring->colors[larray[i]];
4294     }
4295     ierr = PetscFree(larray);CHKERRQ(ierr);
4296     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4297     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4298     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4299 
4300     /* set coloring for off-diagonal portion */
4301     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4302     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4303     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4304     for (i=0; i<a->B->cmap->n; i++) {
4305       colors[i] = coloring->colors[larray[i]];
4306     }
4307     ierr = PetscFree(larray);CHKERRQ(ierr);
4308     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4309     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4310     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4311   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 #undef __FUNCT__
4316 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4317 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4318 {
4319   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4320   PetscErrorCode ierr;
4321 
4322   PetscFunctionBegin;
4323   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4324   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4330 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4331 {
4332   PetscErrorCode ierr;
4333   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4334   PetscInt       *indx;
4335 
4336   PetscFunctionBegin;
4337   /* This routine will ONLY return MPIAIJ type matrix */
4338   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4339   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4340   if (n == PETSC_DECIDE) {
4341     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4342   }
4343   /* Check sum(n) = N */
4344   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4345   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4346 
4347   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4348   rstart -= m;
4349 
4350   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4351   for (i=0; i<m; i++) {
4352     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4353     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4354     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4355   }
4356 
4357   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4358   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4359   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4360   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4361   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4362   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 #undef __FUNCT__
4367 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4368 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4369 {
4370   PetscErrorCode ierr;
4371   PetscInt       m,N,i,rstart,nnz,Ii;
4372   PetscInt       *indx;
4373   PetscScalar    *values;
4374 
4375   PetscFunctionBegin;
4376   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4377   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4378   for (i=0; i<m; i++) {
4379     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4380     Ii   = i + rstart;
4381     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4382     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4383   }
4384   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4385   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4386   PetscFunctionReturn(0);
4387 }
4388 
4389 #undef __FUNCT__
4390 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4391 /*@
4392       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4393                  matrices from each processor
4394 
4395     Collective on MPI_Comm
4396 
4397    Input Parameters:
4398 +    comm - the communicators the parallel matrix will live on
4399 .    inmat - the input sequential matrices
4400 .    n - number of local columns (or PETSC_DECIDE)
4401 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4402 
4403    Output Parameter:
4404 .    outmat - the parallel matrix generated
4405 
4406     Level: advanced
4407 
4408    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4409 
4410 @*/
4411 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4412 {
4413   PetscErrorCode ierr;
4414   PetscMPIInt    size;
4415 
4416   PetscFunctionBegin;
4417   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4418   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4419   if (size == 1) {
4420     if (scall == MAT_INITIAL_MATRIX) {
4421       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4422     } else {
4423       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4424     }
4425   } else {
4426     if (scall == MAT_INITIAL_MATRIX) {
4427       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4428     }
4429     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4430   }
4431   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4432   PetscFunctionReturn(0);
4433 }
4434 
4435 #undef __FUNCT__
4436 #define __FUNCT__ "MatFileSplit"
4437 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4438 {
4439   PetscErrorCode    ierr;
4440   PetscMPIInt       rank;
4441   PetscInt          m,N,i,rstart,nnz;
4442   size_t            len;
4443   const PetscInt    *indx;
4444   PetscViewer       out;
4445   char              *name;
4446   Mat               B;
4447   const PetscScalar *values;
4448 
4449   PetscFunctionBegin;
4450   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4451   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4452   /* Should this be the type of the diagonal block of A? */
4453   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4454   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4455   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4456   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4457   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4458   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4459   for (i=0; i<m; i++) {
4460     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4461     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4462     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4463   }
4464   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4465   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4466 
4467   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4468   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4469   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4470   sprintf(name,"%s.%d",outfile,rank);
4471   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4472   ierr = PetscFree(name);CHKERRQ(ierr);
4473   ierr = MatView(B,out);CHKERRQ(ierr);
4474   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4475   ierr = MatDestroy(&B);CHKERRQ(ierr);
4476   PetscFunctionReturn(0);
4477 }
4478 
4479 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4480 #undef __FUNCT__
4481 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4482 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4483 {
4484   PetscErrorCode      ierr;
4485   Mat_Merge_SeqsToMPI *merge;
4486   PetscContainer      container;
4487 
4488   PetscFunctionBegin;
4489   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4490   if (container) {
4491     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4504     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4505     ierr = PetscFree(merge);CHKERRQ(ierr);
4506     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4507   }
4508   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #include <../src/mat/utils/freespace.h>
4513 #include <petscbt.h>
4514 
4515 #undef __FUNCT__
4516 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4517 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4518 {
4519   PetscErrorCode      ierr;
4520   MPI_Comm            comm;
4521   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4522   PetscMPIInt         size,rank,taga,*len_s;
4523   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4524   PetscInt            proc,m;
4525   PetscInt            **buf_ri,**buf_rj;
4526   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4527   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4528   MPI_Request         *s_waits,*r_waits;
4529   MPI_Status          *status;
4530   MatScalar           *aa=a->a;
4531   MatScalar           **abuf_r,*ba_i;
4532   Mat_Merge_SeqsToMPI *merge;
4533   PetscContainer      container;
4534 
4535   PetscFunctionBegin;
4536   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4537   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4538 
4539   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4540   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4541 
4542   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4543   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4544 
4545   bi     = merge->bi;
4546   bj     = merge->bj;
4547   buf_ri = merge->buf_ri;
4548   buf_rj = merge->buf_rj;
4549 
4550   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4551   owners = merge->rowmap->range;
4552   len_s  = merge->len_s;
4553 
4554   /* send and recv matrix values */
4555   /*-----------------------------*/
4556   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4557   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4558 
4559   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4560   for (proc=0,k=0; proc<size; proc++) {
4561     if (!len_s[proc]) continue;
4562     i    = owners[proc];
4563     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4564     k++;
4565   }
4566 
4567   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4568   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4569   ierr = PetscFree(status);CHKERRQ(ierr);
4570 
4571   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4572   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4573 
4574   /* insert mat values of mpimat */
4575   /*----------------------------*/
4576   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4577   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4578 
4579   for (k=0; k<merge->nrecv; k++) {
4580     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4581     nrows       = *(buf_ri_k[k]);
4582     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4583     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4584   }
4585 
4586   /* set values of ba */
4587   m = merge->rowmap->n;
4588   for (i=0; i<m; i++) {
4589     arow = owners[rank] + i;
4590     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4591     bnzi = bi[i+1] - bi[i];
4592     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4593 
4594     /* add local non-zero vals of this proc's seqmat into ba */
4595     anzi   = ai[arow+1] - ai[arow];
4596     aj     = a->j + ai[arow];
4597     aa     = a->a + ai[arow];
4598     nextaj = 0;
4599     for (j=0; nextaj<anzi; j++) {
4600       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4601         ba_i[j] += aa[nextaj++];
4602       }
4603     }
4604 
4605     /* add received vals into ba */
4606     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4607       /* i-th row */
4608       if (i == *nextrow[k]) {
4609         anzi   = *(nextai[k]+1) - *nextai[k];
4610         aj     = buf_rj[k] + *(nextai[k]);
4611         aa     = abuf_r[k] + *(nextai[k]);
4612         nextaj = 0;
4613         for (j=0; nextaj<anzi; j++) {
4614           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4615             ba_i[j] += aa[nextaj++];
4616           }
4617         }
4618         nextrow[k]++; nextai[k]++;
4619       }
4620     }
4621     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4622   }
4623   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4624   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4625 
4626   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4627   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4628   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4629   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4630   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4631   PetscFunctionReturn(0);
4632 }
4633 
4634 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4635 
4636 #undef __FUNCT__
4637 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4638 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4639 {
4640   PetscErrorCode      ierr;
4641   Mat                 B_mpi;
4642   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4643   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4644   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4645   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4646   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4647   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4648   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4649   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4650   MPI_Status          *status;
4651   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4652   PetscBT             lnkbt;
4653   Mat_Merge_SeqsToMPI *merge;
4654   PetscContainer      container;
4655 
4656   PetscFunctionBegin;
4657   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4658 
4659   /* make sure it is a PETSc comm */
4660   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4662   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4663 
4664   ierr = PetscNew(&merge);CHKERRQ(ierr);
4665   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4666 
4667   /* determine row ownership */
4668   /*---------------------------------------------------------*/
4669   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4670   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4671   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4672   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4673   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4674   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4675   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4676 
4677   m      = merge->rowmap->n;
4678   owners = merge->rowmap->range;
4679 
4680   /* determine the number of messages to send, their lengths */
4681   /*---------------------------------------------------------*/
4682   len_s = merge->len_s;
4683 
4684   len          = 0; /* length of buf_si[] */
4685   merge->nsend = 0;
4686   for (proc=0; proc<size; proc++) {
4687     len_si[proc] = 0;
4688     if (proc == rank) {
4689       len_s[proc] = 0;
4690     } else {
4691       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4692       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4693     }
4694     if (len_s[proc]) {
4695       merge->nsend++;
4696       nrows = 0;
4697       for (i=owners[proc]; i<owners[proc+1]; i++) {
4698         if (ai[i+1] > ai[i]) nrows++;
4699       }
4700       len_si[proc] = 2*(nrows+1);
4701       len         += len_si[proc];
4702     }
4703   }
4704 
4705   /* determine the number and length of messages to receive for ij-structure */
4706   /*-------------------------------------------------------------------------*/
4707   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4708   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4709 
4710   /* post the Irecv of j-structure */
4711   /*-------------------------------*/
4712   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4713   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4714 
4715   /* post the Isend of j-structure */
4716   /*--------------------------------*/
4717   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4718 
4719   for (proc=0, k=0; proc<size; proc++) {
4720     if (!len_s[proc]) continue;
4721     i    = owners[proc];
4722     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4723     k++;
4724   }
4725 
4726   /* receives and sends of j-structure are complete */
4727   /*------------------------------------------------*/
4728   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4729   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4730 
4731   /* send and recv i-structure */
4732   /*---------------------------*/
4733   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4734   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4735 
4736   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4737   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4738   for (proc=0,k=0; proc<size; proc++) {
4739     if (!len_s[proc]) continue;
4740     /* form outgoing message for i-structure:
4741          buf_si[0]:                 nrows to be sent
4742                [1:nrows]:           row index (global)
4743                [nrows+1:2*nrows+1]: i-structure index
4744     */
4745     /*-------------------------------------------*/
4746     nrows       = len_si[proc]/2 - 1;
4747     buf_si_i    = buf_si + nrows+1;
4748     buf_si[0]   = nrows;
4749     buf_si_i[0] = 0;
4750     nrows       = 0;
4751     for (i=owners[proc]; i<owners[proc+1]; i++) {
4752       anzi = ai[i+1] - ai[i];
4753       if (anzi) {
4754         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4755         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4756         nrows++;
4757       }
4758     }
4759     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4760     k++;
4761     buf_si += len_si[proc];
4762   }
4763 
4764   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4765   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4766 
4767   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4768   for (i=0; i<merge->nrecv; i++) {
4769     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4770   }
4771 
4772   ierr = PetscFree(len_si);CHKERRQ(ierr);
4773   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4774   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4775   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4776   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4777   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4778   ierr = PetscFree(status);CHKERRQ(ierr);
4779 
4780   /* compute a local seq matrix in each processor */
4781   /*----------------------------------------------*/
4782   /* allocate bi array and free space for accumulating nonzero column info */
4783   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4784   bi[0] = 0;
4785 
4786   /* create and initialize a linked list */
4787   nlnk = N+1;
4788   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4789 
4790   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4791   len  = ai[owners[rank+1]] - ai[owners[rank]];
4792   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4793 
4794   current_space = free_space;
4795 
4796   /* determine symbolic info for each local row */
4797   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4798 
4799   for (k=0; k<merge->nrecv; k++) {
4800     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4801     nrows       = *buf_ri_k[k];
4802     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4803     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4804   }
4805 
4806   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4807   len  = 0;
4808   for (i=0; i<m; i++) {
4809     bnzi = 0;
4810     /* add local non-zero cols of this proc's seqmat into lnk */
4811     arow  = owners[rank] + i;
4812     anzi  = ai[arow+1] - ai[arow];
4813     aj    = a->j + ai[arow];
4814     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4815     bnzi += nlnk;
4816     /* add received col data into lnk */
4817     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4818       if (i == *nextrow[k]) { /* i-th row */
4819         anzi  = *(nextai[k]+1) - *nextai[k];
4820         aj    = buf_rj[k] + *nextai[k];
4821         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4822         bnzi += nlnk;
4823         nextrow[k]++; nextai[k]++;
4824       }
4825     }
4826     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4827 
4828     /* if free space is not available, make more free space */
4829     if (current_space->local_remaining<bnzi) {
4830       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4831       nspacedouble++;
4832     }
4833     /* copy data into free space, then initialize lnk */
4834     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4835     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4836 
4837     current_space->array           += bnzi;
4838     current_space->local_used      += bnzi;
4839     current_space->local_remaining -= bnzi;
4840 
4841     bi[i+1] = bi[i] + bnzi;
4842   }
4843 
4844   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4845 
4846   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4847   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4848   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4849 
4850   /* create symbolic parallel matrix B_mpi */
4851   /*---------------------------------------*/
4852   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4853   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4854   if (n==PETSC_DECIDE) {
4855     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4856   } else {
4857     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4858   }
4859   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4860   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4861   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4862   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4863   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4864 
4865   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4866   B_mpi->assembled    = PETSC_FALSE;
4867   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4868   merge->bi           = bi;
4869   merge->bj           = bj;
4870   merge->buf_ri       = buf_ri;
4871   merge->buf_rj       = buf_rj;
4872   merge->coi          = NULL;
4873   merge->coj          = NULL;
4874   merge->owners_co    = NULL;
4875 
4876   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4877 
4878   /* attach the supporting struct to B_mpi for reuse */
4879   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4880   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4881   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4882   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4883   *mpimat = B_mpi;
4884 
4885   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4886   PetscFunctionReturn(0);
4887 }
4888 
4889 #undef __FUNCT__
4890 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4891 /*@C
4892       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4893                  matrices from each processor
4894 
4895     Collective on MPI_Comm
4896 
4897    Input Parameters:
4898 +    comm - the communicators the parallel matrix will live on
4899 .    seqmat - the input sequential matrices
4900 .    m - number of local rows (or PETSC_DECIDE)
4901 .    n - number of local columns (or PETSC_DECIDE)
4902 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4903 
4904    Output Parameter:
4905 .    mpimat - the parallel matrix generated
4906 
4907     Level: advanced
4908 
4909    Notes:
4910      The dimensions of the sequential matrix in each processor MUST be the same.
4911      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4912      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4913 @*/
4914 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4915 {
4916   PetscErrorCode ierr;
4917   PetscMPIInt    size;
4918 
4919   PetscFunctionBegin;
4920   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4921   if (size == 1) {
4922     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4923     if (scall == MAT_INITIAL_MATRIX) {
4924       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4925     } else {
4926       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4927     }
4928     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4929     PetscFunctionReturn(0);
4930   }
4931   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4932   if (scall == MAT_INITIAL_MATRIX) {
4933     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4934   }
4935   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4936   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4937   PetscFunctionReturn(0);
4938 }
4939 
4940 #undef __FUNCT__
4941 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4942 /*@
4943      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4944           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4945           with MatGetSize()
4946 
4947     Not Collective
4948 
4949    Input Parameters:
4950 +    A - the matrix
4951 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4952 
4953    Output Parameter:
4954 .    A_loc - the local sequential matrix generated
4955 
4956     Level: developer
4957 
4958 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4959 
4960 @*/
4961 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4962 {
4963   PetscErrorCode ierr;
4964   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4965   Mat_SeqAIJ     *mat,*a,*b;
4966   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4967   MatScalar      *aa,*ba,*cam;
4968   PetscScalar    *ca;
4969   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4970   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4971   PetscBool      match;
4972 
4973   PetscFunctionBegin;
4974   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4975   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4976   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4977   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4978   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4979   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4980   aa = a->a; ba = b->a;
4981   if (scall == MAT_INITIAL_MATRIX) {
4982     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4983     ci[0] = 0;
4984     for (i=0; i<am; i++) {
4985       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4986     }
4987     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4988     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4989     k    = 0;
4990     for (i=0; i<am; i++) {
4991       ncols_o = bi[i+1] - bi[i];
4992       ncols_d = ai[i+1] - ai[i];
4993       /* off-diagonal portion of A */
4994       for (jo=0; jo<ncols_o; jo++) {
4995         col = cmap[*bj];
4996         if (col >= cstart) break;
4997         cj[k]   = col; bj++;
4998         ca[k++] = *ba++;
4999       }
5000       /* diagonal portion of A */
5001       for (j=0; j<ncols_d; j++) {
5002         cj[k]   = cstart + *aj++;
5003         ca[k++] = *aa++;
5004       }
5005       /* off-diagonal portion of A */
5006       for (j=jo; j<ncols_o; j++) {
5007         cj[k]   = cmap[*bj++];
5008         ca[k++] = *ba++;
5009       }
5010     }
5011     /* put together the new matrix */
5012     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5013     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5014     /* Since these are PETSc arrays, change flags to free them as necessary. */
5015     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5016     mat->free_a  = PETSC_TRUE;
5017     mat->free_ij = PETSC_TRUE;
5018     mat->nonew   = 0;
5019   } else if (scall == MAT_REUSE_MATRIX) {
5020     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5021     ci = mat->i; cj = mat->j; cam = mat->a;
5022     for (i=0; i<am; i++) {
5023       /* off-diagonal portion of A */
5024       ncols_o = bi[i+1] - bi[i];
5025       for (jo=0; jo<ncols_o; jo++) {
5026         col = cmap[*bj];
5027         if (col >= cstart) break;
5028         *cam++ = *ba++; bj++;
5029       }
5030       /* diagonal portion of A */
5031       ncols_d = ai[i+1] - ai[i];
5032       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5033       /* off-diagonal portion of A */
5034       for (j=jo; j<ncols_o; j++) {
5035         *cam++ = *ba++; bj++;
5036       }
5037     }
5038   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5039   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5040   PetscFunctionReturn(0);
5041 }
5042 
5043 #undef __FUNCT__
5044 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5045 /*@C
5046      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5047 
5048     Not Collective
5049 
5050    Input Parameters:
5051 +    A - the matrix
5052 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5053 -    row, col - index sets of rows and columns to extract (or NULL)
5054 
5055    Output Parameter:
5056 .    A_loc - the local sequential matrix generated
5057 
5058     Level: developer
5059 
5060 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5061 
5062 @*/
5063 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5064 {
5065   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5066   PetscErrorCode ierr;
5067   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5068   IS             isrowa,iscola;
5069   Mat            *aloc;
5070   PetscBool      match;
5071 
5072   PetscFunctionBegin;
5073   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5074   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5075   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5076   if (!row) {
5077     start = A->rmap->rstart; end = A->rmap->rend;
5078     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5079   } else {
5080     isrowa = *row;
5081   }
5082   if (!col) {
5083     start = A->cmap->rstart;
5084     cmap  = a->garray;
5085     nzA   = a->A->cmap->n;
5086     nzB   = a->B->cmap->n;
5087     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5088     ncols = 0;
5089     for (i=0; i<nzB; i++) {
5090       if (cmap[i] < start) idx[ncols++] = cmap[i];
5091       else break;
5092     }
5093     imark = i;
5094     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5095     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5096     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5097   } else {
5098     iscola = *col;
5099   }
5100   if (scall != MAT_INITIAL_MATRIX) {
5101     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5102     aloc[0] = *A_loc;
5103   }
5104   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5105   *A_loc = aloc[0];
5106   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5107   if (!row) {
5108     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5109   }
5110   if (!col) {
5111     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5112   }
5113   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5114   PetscFunctionReturn(0);
5115 }
5116 
5117 #undef __FUNCT__
5118 #define __FUNCT__ "MatGetBrowsOfAcols"
5119 /*@C
5120     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5121 
5122     Collective on Mat
5123 
5124    Input Parameters:
5125 +    A,B - the matrices in mpiaij format
5126 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5127 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5128 
5129    Output Parameter:
5130 +    rowb, colb - index sets of rows and columns of B to extract
5131 -    B_seq - the sequential matrix generated
5132 
5133     Level: developer
5134 
5135 @*/
5136 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5137 {
5138   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5139   PetscErrorCode ierr;
5140   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5141   IS             isrowb,iscolb;
5142   Mat            *bseq=NULL;
5143 
5144   PetscFunctionBegin;
5145   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5146     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5147   }
5148   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5149 
5150   if (scall == MAT_INITIAL_MATRIX) {
5151     start = A->cmap->rstart;
5152     cmap  = a->garray;
5153     nzA   = a->A->cmap->n;
5154     nzB   = a->B->cmap->n;
5155     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5156     ncols = 0;
5157     for (i=0; i<nzB; i++) {  /* row < local row index */
5158       if (cmap[i] < start) idx[ncols++] = cmap[i];
5159       else break;
5160     }
5161     imark = i;
5162     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5163     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5164     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5165     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5166   } else {
5167     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5168     isrowb  = *rowb; iscolb = *colb;
5169     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5170     bseq[0] = *B_seq;
5171   }
5172   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5173   *B_seq = bseq[0];
5174   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5175   if (!rowb) {
5176     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5177   } else {
5178     *rowb = isrowb;
5179   }
5180   if (!colb) {
5181     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5182   } else {
5183     *colb = iscolb;
5184   }
5185   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5186   PetscFunctionReturn(0);
5187 }
5188 
5189 #undef __FUNCT__
5190 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5191 /*
5192     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5193     of the OFF-DIAGONAL portion of local A
5194 
5195     Collective on Mat
5196 
5197    Input Parameters:
5198 +    A,B - the matrices in mpiaij format
5199 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5200 
5201    Output Parameter:
5202 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5203 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5204 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5205 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5206 
5207     Level: developer
5208 
5209 */
5210 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5211 {
5212   VecScatter_MPI_General *gen_to,*gen_from;
5213   PetscErrorCode         ierr;
5214   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5215   Mat_SeqAIJ             *b_oth;
5216   VecScatter             ctx =a->Mvctx;
5217   MPI_Comm               comm;
5218   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5219   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5220   PetscScalar            *rvalues,*svalues;
5221   MatScalar              *b_otha,*bufa,*bufA;
5222   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5223   MPI_Request            *rwaits = NULL,*swaits = NULL;
5224   MPI_Status             *sstatus,rstatus;
5225   PetscMPIInt            jj;
5226   PetscInt               *cols,sbs,rbs;
5227   PetscScalar            *vals;
5228 
5229   PetscFunctionBegin;
5230   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5231   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5232     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5233   }
5234   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5235   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5236 
5237   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5238   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5239   rvalues  = gen_from->values; /* holds the length of receiving row */
5240   svalues  = gen_to->values;   /* holds the length of sending row */
5241   nrecvs   = gen_from->n;
5242   nsends   = gen_to->n;
5243 
5244   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5245   srow    = gen_to->indices;    /* local row index to be sent */
5246   sstarts = gen_to->starts;
5247   sprocs  = gen_to->procs;
5248   sstatus = gen_to->sstatus;
5249   sbs     = gen_to->bs;
5250   rstarts = gen_from->starts;
5251   rprocs  = gen_from->procs;
5252   rbs     = gen_from->bs;
5253 
5254   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5255   if (scall == MAT_INITIAL_MATRIX) {
5256     /* i-array */
5257     /*---------*/
5258     /*  post receives */
5259     for (i=0; i<nrecvs; i++) {
5260       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5261       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5262       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5263     }
5264 
5265     /* pack the outgoing message */
5266     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5267 
5268     sstartsj[0] = 0;
5269     rstartsj[0] = 0;
5270     len         = 0; /* total length of j or a array to be sent */
5271     k           = 0;
5272     for (i=0; i<nsends; i++) {
5273       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5274       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5275       for (j=0; j<nrows; j++) {
5276         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5277         for (l=0; l<sbs; l++) {
5278           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5279 
5280           rowlen[j*sbs+l] = ncols;
5281 
5282           len += ncols;
5283           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5284         }
5285         k++;
5286       }
5287       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5288 
5289       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5290     }
5291     /* recvs and sends of i-array are completed */
5292     i = nrecvs;
5293     while (i--) {
5294       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5295     }
5296     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5297 
5298     /* allocate buffers for sending j and a arrays */
5299     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5300     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5301 
5302     /* create i-array of B_oth */
5303     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5304 
5305     b_othi[0] = 0;
5306     len       = 0; /* total length of j or a array to be received */
5307     k         = 0;
5308     for (i=0; i<nrecvs; i++) {
5309       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5310       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5311       for (j=0; j<nrows; j++) {
5312         b_othi[k+1] = b_othi[k] + rowlen[j];
5313         len        += rowlen[j]; k++;
5314       }
5315       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5316     }
5317 
5318     /* allocate space for j and a arrrays of B_oth */
5319     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5320     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5321 
5322     /* j-array */
5323     /*---------*/
5324     /*  post receives of j-array */
5325     for (i=0; i<nrecvs; i++) {
5326       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5327       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5328     }
5329 
5330     /* pack the outgoing message j-array */
5331     k = 0;
5332     for (i=0; i<nsends; i++) {
5333       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5334       bufJ  = bufj+sstartsj[i];
5335       for (j=0; j<nrows; j++) {
5336         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5337         for (ll=0; ll<sbs; ll++) {
5338           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5339           for (l=0; l<ncols; l++) {
5340             *bufJ++ = cols[l];
5341           }
5342           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5343         }
5344       }
5345       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5346     }
5347 
5348     /* recvs and sends of j-array are completed */
5349     i = nrecvs;
5350     while (i--) {
5351       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5352     }
5353     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5354   } else if (scall == MAT_REUSE_MATRIX) {
5355     sstartsj = *startsj_s;
5356     rstartsj = *startsj_r;
5357     bufa     = *bufa_ptr;
5358     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5359     b_otha   = b_oth->a;
5360   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5361 
5362   /* a-array */
5363   /*---------*/
5364   /*  post receives of a-array */
5365   for (i=0; i<nrecvs; i++) {
5366     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5367     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5368   }
5369 
5370   /* pack the outgoing message a-array */
5371   k = 0;
5372   for (i=0; i<nsends; i++) {
5373     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5374     bufA  = bufa+sstartsj[i];
5375     for (j=0; j<nrows; j++) {
5376       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5377       for (ll=0; ll<sbs; ll++) {
5378         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5379         for (l=0; l<ncols; l++) {
5380           *bufA++ = vals[l];
5381         }
5382         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5383       }
5384     }
5385     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5386   }
5387   /* recvs and sends of a-array are completed */
5388   i = nrecvs;
5389   while (i--) {
5390     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5391   }
5392   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5393   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5394 
5395   if (scall == MAT_INITIAL_MATRIX) {
5396     /* put together the new matrix */
5397     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5398 
5399     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5400     /* Since these are PETSc arrays, change flags to free them as necessary. */
5401     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5402     b_oth->free_a  = PETSC_TRUE;
5403     b_oth->free_ij = PETSC_TRUE;
5404     b_oth->nonew   = 0;
5405 
5406     ierr = PetscFree(bufj);CHKERRQ(ierr);
5407     if (!startsj_s || !bufa_ptr) {
5408       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5409       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5410     } else {
5411       *startsj_s = sstartsj;
5412       *startsj_r = rstartsj;
5413       *bufa_ptr  = bufa;
5414     }
5415   }
5416   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 #undef __FUNCT__
5421 #define __FUNCT__ "MatGetCommunicationStructs"
5422 /*@C
5423   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5424 
5425   Not Collective
5426 
5427   Input Parameters:
5428 . A - The matrix in mpiaij format
5429 
5430   Output Parameter:
5431 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5432 . colmap - A map from global column index to local index into lvec
5433 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5434 
5435   Level: developer
5436 
5437 @*/
5438 #if defined(PETSC_USE_CTABLE)
5439 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5440 #else
5441 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5442 #endif
5443 {
5444   Mat_MPIAIJ *a;
5445 
5446   PetscFunctionBegin;
5447   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5448   PetscValidPointer(lvec, 2);
5449   PetscValidPointer(colmap, 3);
5450   PetscValidPointer(multScatter, 4);
5451   a = (Mat_MPIAIJ*) A->data;
5452   if (lvec) *lvec = a->lvec;
5453   if (colmap) *colmap = a->colmap;
5454   if (multScatter) *multScatter = a->Mvctx;
5455   PetscFunctionReturn(0);
5456 }
5457 
5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5461 
5462 #undef __FUNCT__
5463 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5464 /*
5465     Computes (B'*A')' since computing B*A directly is untenable
5466 
5467                n                       p                          p
5468         (              )       (              )         (                  )
5469       m (      A       )  *  n (       B      )   =   m (         C        )
5470         (              )       (              )         (                  )
5471 
5472 */
5473 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5474 {
5475   PetscErrorCode ierr;
5476   Mat            At,Bt,Ct;
5477 
5478   PetscFunctionBegin;
5479   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5480   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5481   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5482   ierr = MatDestroy(&At);CHKERRQ(ierr);
5483   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5484   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5485   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5486   PetscFunctionReturn(0);
5487 }
5488 
5489 #undef __FUNCT__
5490 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5491 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5492 {
5493   PetscErrorCode ierr;
5494   PetscInt       m=A->rmap->n,n=B->cmap->n;
5495   Mat            Cmat;
5496 
5497   PetscFunctionBegin;
5498   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5499   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5500   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5501   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5502   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5503   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5504   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5505   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5506 
5507   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5508 
5509   *C = Cmat;
5510   PetscFunctionReturn(0);
5511 }
5512 
5513 /* ----------------------------------------------------------------*/
5514 #undef __FUNCT__
5515 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5516 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5517 {
5518   PetscErrorCode ierr;
5519 
5520   PetscFunctionBegin;
5521   if (scall == MAT_INITIAL_MATRIX) {
5522     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5523     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5524     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5525   }
5526   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5527   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5528   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5529   PetscFunctionReturn(0);
5530 }
5531 
5532 #if defined(PETSC_HAVE_MUMPS)
5533 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5534 #endif
5535 #if defined(PETSC_HAVE_PASTIX)
5536 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5537 #endif
5538 #if defined(PETSC_HAVE_SUPERLU_DIST)
5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5540 #endif
5541 #if defined(PETSC_HAVE_CLIQUE)
5542 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5543 #endif
5544 
5545 /*MC
5546    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5547 
5548    Options Database Keys:
5549 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5550 
5551   Level: beginner
5552 
5553 .seealso: MatCreateAIJ()
5554 M*/
5555 
5556 #undef __FUNCT__
5557 #define __FUNCT__ "MatCreate_MPIAIJ"
5558 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5559 {
5560   Mat_MPIAIJ     *b;
5561   PetscErrorCode ierr;
5562   PetscMPIInt    size;
5563 
5564   PetscFunctionBegin;
5565   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5566 
5567   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5568   B->data       = (void*)b;
5569   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5570   B->assembled  = PETSC_FALSE;
5571   B->insertmode = NOT_SET_VALUES;
5572   b->size       = size;
5573 
5574   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5575 
5576   /* build cache for off array entries formed */
5577   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5578 
5579   b->donotstash  = PETSC_FALSE;
5580   b->colmap      = 0;
5581   b->garray      = 0;
5582   b->roworiented = PETSC_TRUE;
5583 
5584   /* stuff used for matrix vector multiply */
5585   b->lvec  = NULL;
5586   b->Mvctx = NULL;
5587 
5588   /* stuff for MatGetRow() */
5589   b->rowindices   = 0;
5590   b->rowvalues    = 0;
5591   b->getrowactive = PETSC_FALSE;
5592 
5593   /* flexible pointer used in CUSP/CUSPARSE classes */
5594   b->spptr = NULL;
5595 
5596 #if defined(PETSC_HAVE_MUMPS)
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5598 #endif
5599 #if defined(PETSC_HAVE_PASTIX)
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5601 #endif
5602 #if defined(PETSC_HAVE_SUPERLU_DIST)
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5604 #endif
5605 #if defined(PETSC_HAVE_CLIQUE)
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5607 #endif
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5622   PetscFunctionReturn(0);
5623 }
5624 
5625 #undef __FUNCT__
5626 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5627 /*@
5628      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5629          and "off-diagonal" part of the matrix in CSR format.
5630 
5631    Collective on MPI_Comm
5632 
5633    Input Parameters:
5634 +  comm - MPI communicator
5635 .  m - number of local rows (Cannot be PETSC_DECIDE)
5636 .  n - This value should be the same as the local size used in creating the
5637        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5638        calculated if N is given) For square matrices n is almost always m.
5639 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5640 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5641 .   i - row indices for "diagonal" portion of matrix
5642 .   j - column indices
5643 .   a - matrix values
5644 .   oi - row indices for "off-diagonal" portion of matrix
5645 .   oj - column indices
5646 -   oa - matrix values
5647 
5648    Output Parameter:
5649 .   mat - the matrix
5650 
5651    Level: advanced
5652 
5653    Notes:
5654        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5655        must free the arrays once the matrix has been destroyed and not before.
5656 
5657        The i and j indices are 0 based
5658 
5659        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5660 
5661        This sets local rows and cannot be used to set off-processor values.
5662 
5663        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5664        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5665        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5666        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5667        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5668        communication if it is known that only local entries will be set.
5669 
5670 .keywords: matrix, aij, compressed row, sparse, parallel
5671 
5672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5673           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5674 @*/
5675 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5676 {
5677   PetscErrorCode ierr;
5678   Mat_MPIAIJ     *maij;
5679 
5680   PetscFunctionBegin;
5681   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5682   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5683   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5684   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5685   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5686   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5687   maij = (Mat_MPIAIJ*) (*mat)->data;
5688 
5689   (*mat)->preallocated = PETSC_TRUE;
5690 
5691   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5692   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5693 
5694   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5695   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5696 
5697   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701 
5702   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5705   PetscFunctionReturn(0);
5706 }
5707 
5708 /*
5709     Special version for direct calls from Fortran
5710 */
5711 #include <petsc-private/fortranimpl.h>
5712 
5713 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5714 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5715 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5716 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5717 #endif
5718 
5719 /* Change these macros so can be used in void function */
5720 #undef CHKERRQ
5721 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5722 #undef SETERRQ2
5723 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5724 #undef SETERRQ3
5725 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5726 #undef SETERRQ
5727 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5728 
5729 #undef __FUNCT__
5730 #define __FUNCT__ "matsetvaluesmpiaij_"
5731 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5732 {
5733   Mat            mat  = *mmat;
5734   PetscInt       m    = *mm, n = *mn;
5735   InsertMode     addv = *maddv;
5736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5737   PetscScalar    value;
5738   PetscErrorCode ierr;
5739 
5740   MatCheckPreallocated(mat,1);
5741   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5742 
5743 #if defined(PETSC_USE_DEBUG)
5744   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5745 #endif
5746   {
5747     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5748     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5749     PetscBool roworiented = aij->roworiented;
5750 
5751     /* Some Variables required in the macro */
5752     Mat        A                 = aij->A;
5753     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5754     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5755     MatScalar  *aa               = a->a;
5756     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5757     Mat        B                 = aij->B;
5758     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5759     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5760     MatScalar  *ba               = b->a;
5761 
5762     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5763     PetscInt  nonew = a->nonew;
5764     MatScalar *ap1,*ap2;
5765 
5766     PetscFunctionBegin;
5767     for (i=0; i<m; i++) {
5768       if (im[i] < 0) continue;
5769 #if defined(PETSC_USE_DEBUG)
5770       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5771 #endif
5772       if (im[i] >= rstart && im[i] < rend) {
5773         row      = im[i] - rstart;
5774         lastcol1 = -1;
5775         rp1      = aj + ai[row];
5776         ap1      = aa + ai[row];
5777         rmax1    = aimax[row];
5778         nrow1    = ailen[row];
5779         low1     = 0;
5780         high1    = nrow1;
5781         lastcol2 = -1;
5782         rp2      = bj + bi[row];
5783         ap2      = ba + bi[row];
5784         rmax2    = bimax[row];
5785         nrow2    = bilen[row];
5786         low2     = 0;
5787         high2    = nrow2;
5788 
5789         for (j=0; j<n; j++) {
5790           if (roworiented) value = v[i*n+j];
5791           else value = v[i+j*m];
5792           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5793           if (in[j] >= cstart && in[j] < cend) {
5794             col = in[j] - cstart;
5795             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5796           } else if (in[j] < 0) continue;
5797 #if defined(PETSC_USE_DEBUG)
5798           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5799 #endif
5800           else {
5801             if (mat->was_assembled) {
5802               if (!aij->colmap) {
5803                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5804               }
5805 #if defined(PETSC_USE_CTABLE)
5806               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5807               col--;
5808 #else
5809               col = aij->colmap[in[j]] - 1;
5810 #endif
5811               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5812                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5813                 col  =  in[j];
5814                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5815                 B     = aij->B;
5816                 b     = (Mat_SeqAIJ*)B->data;
5817                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5818                 rp2   = bj + bi[row];
5819                 ap2   = ba + bi[row];
5820                 rmax2 = bimax[row];
5821                 nrow2 = bilen[row];
5822                 low2  = 0;
5823                 high2 = nrow2;
5824                 bm    = aij->B->rmap->n;
5825                 ba    = b->a;
5826               }
5827             } else col = in[j];
5828             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5829           }
5830         }
5831       } else if (!aij->donotstash) {
5832         if (roworiented) {
5833           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5834         } else {
5835           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5836         }
5837       }
5838     }
5839   }
5840   PetscFunctionReturnVoid();
5841 }
5842 
5843