xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bfd264e73d4f53735ee2d13f3be48efe21fdc128)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscSF        sf;
748   PetscInt      *lrows;
749   PetscSFNode   *rrows;
750   PetscInt       r, p = 0, len = 0;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Create SF where leaves are input rows and roots are owned rows */
755   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
756   for (r = 0; r < n; ++r) lrows[r] = -1;
757   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
758   for (r = 0; r < N; ++r) {
759     const PetscInt idx   = rows[r];
760     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
763     }
764     if (A->nooffproczerorows) {
765       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766       lrows[len++] = idx - owners[p];
767     } else {
768       rrows[r].rank = p;
769       rrows[r].index = rows[r] - owners[p];
770     }
771   }
772   if (!A->nooffproczerorows) {
773     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775     /* Collect flags for rows to be zeroed */
776     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
777     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
778     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779     /* Compress and put in row numbers */
780     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   }
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
797   } else if (diag != 0.0) {
798     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
799     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800     for (r = 0; r < len; ++r) {
801       const PetscInt row = lrows[r] + A->rmap->rstart;
802       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
803     }
804     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806   } else {
807     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   }
809   ierr = PetscFree(lrows);CHKERRQ(ierr);
810 
811   /* only change matrix nonzero state if pattern was allowed to be changed */
812   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMult_MPIAIJ"
936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937 {
938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       nt;
941 
942   PetscFunctionBegin;
943   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
944   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatMultAdd_MPIAIJ"
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
974   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1012 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscMPIInt    size;
1021 
1022   PetscFunctionBegin;
1023   /* Easy test: symmetric diagonal block */
1024   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1026   if (!*f) PetscFunctionReturn(0);
1027   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1034   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1035   for (i=0; i<first; i++) notme[i] = i;
1036   for (i=last; i<M; i++) notme[i-last+first] = i;
1037   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1038   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1039   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1040   Aoff = Aoffs[0];
1041   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1042   Boff = Boffs[0];
1043   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1044   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1045   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1046   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1047   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1048   ierr = PetscFree(notme);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078 {
1079   PetscErrorCode ierr;
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081 
1082   PetscFunctionBegin;
1083   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatScale_MPIAIJ"
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatDestroy_Redundant"
1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105 {
1106   PetscErrorCode ierr;
1107   Mat_Redundant  *redund = *redundant;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   *redundant = NULL;
1112   if (redund){
1113     if (redund->matseq) { /* via MatGetSubMatrices()  */
1114       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1115       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1116       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1117       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1118     } else {
1119       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1120       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1121       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1122       for (i=0; i<redund->nrecvs; i++) {
1123         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1124         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1125       }
1126       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1127     }
1128 
1129     if (redund->psubcomm) {
1130       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1131     }
1132     ierr = PetscFree(redund);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatDestroy_MPIAIJ"
1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145 #if defined(PETSC_USE_LOG)
1146   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147 #endif
1148   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1149   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1150   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1151   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1152   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1153 #if defined(PETSC_USE_CTABLE)
1154   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1155 #else
1156   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1157 #endif
1158   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1159   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1160   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1161   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1162   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1182   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1183   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1184   PetscErrorCode ierr;
1185   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1186   int            fd;
1187   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1188   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189   PetscScalar    *column_values;
1190   PetscInt       message_count,flowcontrolcount;
1191   FILE           *file;
1192 
1193   PetscFunctionBegin;
1194   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1195   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1196   nz   = A->nz + B->nz;
1197   if (!rank) {
1198     header[0] = MAT_FILE_CLASSID;
1199     header[1] = mat->rmap->N;
1200     header[2] = mat->cmap->N;
1201 
1202     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1204     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     /* get largest number of rows any processor has */
1206     rlen  = mat->rmap->n;
1207     range = mat->rmap->range;
1208     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209   } else {
1210     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211     rlen = mat->rmap->n;
1212   }
1213 
1214   /* load up the local row counts */
1215   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1216   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1217 
1218   /* store the row lengths to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       rlen = range[i+1] - range[i];
1225       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1235 
1236   /* load up the local column indices */
1237   nzmax = nz; /* th processor needs space a largest processor needs */
1238   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1240   cnt   = 0;
1241   for (i=0; i<mat->rmap->n; i++) {
1242     for (j=B->i[i]; j<B->i[i+1]; j++) {
1243       if ((col = garray[B->j[j]]) > cstart) break;
1244       column_indices[cnt++] = col;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248   }
1249   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1250 
1251   /* store the column indices to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1259       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1264   } else {
1265     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1266     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1271 
1272   /* load up the local column values */
1273   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1274   cnt  = 0;
1275   for (i=0; i<mat->rmap->n; i++) {
1276     for (j=B->i[i]; j<B->i[i+1]; j++) {
1277       if (garray[B->j[j]] > cstart) break;
1278       column_values[cnt++] = B->a[j];
1279     }
1280     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282   }
1283   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1284 
1285   /* store the column values to the file */
1286   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1287   if (!rank) {
1288     MPI_Status status;
1289     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1290     for (i=1; i<size; i++) {
1291       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1292       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1293       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1296     }
1297     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1298   } else {
1299     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1300     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(column_values);CHKERRQ(ierr);
1305 
1306   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1307   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #include <petscdraw.h>
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315 {
1316   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank = aij->rank,size = aij->size;
1319   PetscBool         isdraw,iascii,isbinary;
1320   PetscViewer       sviewer;
1321   PetscViewerFormat format;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1327   if (iascii) {
1328     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1329     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330       MatInfo   info;
1331       PetscBool inodes;
1332 
1333       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1334       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1337       if (!inodes) {
1338         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1340       } else {
1341         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1343       }
1344       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1346       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1348       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1349       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1351       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1352       PetscFunctionReturn(0);
1353     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354       PetscInt inodecount,inodelimit,*inodes;
1355       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1356       if (inodes) {
1357         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1358       } else {
1359         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1360       }
1361       PetscFunctionReturn(0);
1362     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363       PetscFunctionReturn(0);
1364     }
1365   } else if (isbinary) {
1366     if (size == 1) {
1367       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1368       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1371     }
1372     PetscFunctionReturn(0);
1373   } else if (isdraw) {
1374     PetscDraw draw;
1375     PetscBool isnull;
1376     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1377     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1378   }
1379 
1380   {
1381     /* assemble the entire matrix onto first processor. */
1382     Mat        A;
1383     Mat_SeqAIJ *Aloc;
1384     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385     MatScalar  *a;
1386 
1387     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1388     if (!rank) {
1389       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1390     } else {
1391       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1392     }
1393     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1394     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1395     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1396     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1397     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1398 
1399     /* copy over the A part */
1400     Aloc = (Mat_SeqAIJ*)aij->A->data;
1401     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1402     row  = mat->rmap->rstart;
1403     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1408     }
1409     aj = Aloc->j;
1410     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1411 
1412     /* copy over the B part */
1413     Aloc = (Mat_SeqAIJ*)aij->B->data;
1414     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1415     row  = mat->rmap->rstart;
1416     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1417     ct   = cols;
1418     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1423     }
1424     ierr = PetscFree(ct);CHKERRQ(ierr);
1425     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427     /*
1428        Everyone has to call to draw the matrix since the graphics waits are
1429        synchronized across all processors that share the PetscDraw object
1430     */
1431     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1432     if (!rank) {
1433       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1434     }
1435     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1436     ierr = MatDestroy(&A);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatView_MPIAIJ"
1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1444 {
1445   PetscErrorCode ierr;
1446   PetscBool      iascii,isdraw,issocket,isbinary;
1447 
1448   PetscFunctionBegin;
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1451   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1453   if (iascii || isdraw || isbinary || issocket) {
1454     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatSOR_MPIAIJ"
1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscErrorCode ierr;
1465   Vec            bb1 = 0;
1466   PetscBool      hasop;
1467 
1468   PetscFunctionBegin;
1469   if (flag == SOR_APPLY_UPPER) {
1470     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471     PetscFunctionReturn(0);
1472   }
1473 
1474   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1475     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1476   }
1477 
1478   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1479     if (flag & SOR_ZERO_INITIAL_GUESS) {
1480       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481       its--;
1482     }
1483 
1484     while (its--) {
1485       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487 
1488       /* update rhs: bb1 = bb - B*x */
1489       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1491 
1492       /* local sweep */
1493       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1494     }
1495   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_EISENSTAT) {
1528     Vec xx1;
1529 
1530     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1531     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1532 
1533     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535     if (!mat->diag) {
1536       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1537       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1538     }
1539     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1540     if (hasop) {
1541       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1542     } else {
1543       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1544     }
1545     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1546 
1547     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1548 
1549     /* local sweep */
1550     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1551     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1552     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1553   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1554 
1555   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatPermute_MPIAIJ"
1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1562 {
1563   Mat            aA,aB,Aperm;
1564   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1565   PetscScalar    *aa,*ba;
1566   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1567   PetscSF        rowsf,sf;
1568   IS             parcolp = NULL;
1569   PetscBool      done;
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1574   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1575   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1576   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1577 
1578   /* Invert row permutation to find out where my rows should go */
1579   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1580   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1581   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1582   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1583   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1585 
1586   /* Invert column permutation to find out where my columns should go */
1587   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1588   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1589   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1590   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1591   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1592   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1594 
1595   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1596   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1597   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1598 
1599   /* Find out where my gcols should go */
1600   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1601   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1602   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1603   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1604   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1605   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1606   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1607   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1608 
1609   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1610   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   for (i=0; i<m; i++) {
1613     PetscInt row = rdest[i],rowner;
1614     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1615     for (j=ai[i]; j<ai[i+1]; j++) {
1616       PetscInt cowner,col = cdest[aj[j]];
1617       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1618       if (rowner == cowner) dnnz[i]++;
1619       else onnz[i]++;
1620     }
1621     for (j=bi[i]; j<bi[i+1]; j++) {
1622       PetscInt cowner,col = gcdest[bj[j]];
1623       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1624       if (rowner == cowner) dnnz[i]++;
1625       else onnz[i]++;
1626     }
1627   }
1628   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1629   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1630   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1631   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1632   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1633 
1634   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1635   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1637   for (i=0; i<m; i++) {
1638     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1639     PetscInt j0,rowlen;
1640     rowlen = ai[i+1] - ai[i];
1641     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1642       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1643       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1644     }
1645     rowlen = bi[i+1] - bi[i];
1646     for (j0=j=0; j<rowlen; j0=j) {
1647       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1648       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1649     }
1650   }
1651   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1652   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1653   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1654   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1655   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1657   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1658   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1659   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1660   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1661   *B = Aperm;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1668 {
1669   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1670   Mat            A    = mat->A,B = mat->B;
1671   PetscErrorCode ierr;
1672   PetscReal      isend[5],irecv[5];
1673 
1674   PetscFunctionBegin;
1675   info->block_size = 1.0;
1676   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1677 
1678   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1679   isend[3] = info->memory;  isend[4] = info->mallocs;
1680 
1681   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1682 
1683   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1684   isend[3] += info->memory;  isend[4] += info->mallocs;
1685   if (flag == MAT_LOCAL) {
1686     info->nz_used      = isend[0];
1687     info->nz_allocated = isend[1];
1688     info->nz_unneeded  = isend[2];
1689     info->memory       = isend[3];
1690     info->mallocs      = isend[4];
1691   } else if (flag == MAT_GLOBAL_MAX) {
1692     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1693 
1694     info->nz_used      = irecv[0];
1695     info->nz_allocated = irecv[1];
1696     info->nz_unneeded  = irecv[2];
1697     info->memory       = irecv[3];
1698     info->mallocs      = irecv[4];
1699   } else if (flag == MAT_GLOBAL_SUM) {
1700     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1701 
1702     info->nz_used      = irecv[0];
1703     info->nz_allocated = irecv[1];
1704     info->nz_unneeded  = irecv[2];
1705     info->memory       = irecv[3];
1706     info->mallocs      = irecv[4];
1707   }
1708   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1709   info->fill_ratio_needed = 0;
1710   info->factor_mallocs    = 0;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatSetOption_MPIAIJ"
1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1717 {
1718   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1719   PetscErrorCode ierr;
1720 
1721   PetscFunctionBegin;
1722   switch (op) {
1723   case MAT_NEW_NONZERO_LOCATIONS:
1724   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1725   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1726   case MAT_KEEP_NONZERO_PATTERN:
1727   case MAT_NEW_NONZERO_LOCATION_ERR:
1728   case MAT_USE_INODES:
1729   case MAT_IGNORE_ZERO_ENTRIES:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1733     break;
1734   case MAT_ROW_ORIENTED:
1735     a->roworiented = flg;
1736 
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_NEW_DIAGONALS:
1741     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1742     break;
1743   case MAT_IGNORE_OFF_PROC_ENTRIES:
1744     a->donotstash = flg;
1745     break;
1746   case MAT_SPD:
1747     A->spd_set = PETSC_TRUE;
1748     A->spd     = flg;
1749     if (flg) {
1750       A->symmetric                  = PETSC_TRUE;
1751       A->structurally_symmetric     = PETSC_TRUE;
1752       A->symmetric_set              = PETSC_TRUE;
1753       A->structurally_symmetric_set = PETSC_TRUE;
1754     }
1755     break;
1756   case MAT_SYMMETRIC:
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_STRUCTURALLY_SYMMETRIC:
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_HERMITIAN:
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_SYMMETRY_ETERNAL:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   default:
1769     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1770   }
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 #undef __FUNCT__
1775 #define __FUNCT__ "MatGetRow_MPIAIJ"
1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1777 {
1778   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1779   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1780   PetscErrorCode ierr;
1781   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1782   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1783   PetscInt       *cmap,*idx_p;
1784 
1785   PetscFunctionBegin;
1786   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1787   mat->getrowactive = PETSC_TRUE;
1788 
1789   if (!mat->rowvalues && (idx || v)) {
1790     /*
1791         allocate enough space to hold information from the longest row.
1792     */
1793     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1794     PetscInt   max = 1,tmp;
1795     for (i=0; i<matin->rmap->n; i++) {
1796       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1797       if (max < tmp) max = tmp;
1798     }
1799     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1800   }
1801 
1802   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1803   lrow = row - rstart;
1804 
1805   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1806   if (!v)   {pvA = 0; pvB = 0;}
1807   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1808   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1809   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1810   nztot = nzA + nzB;
1811 
1812   cmap = mat->garray;
1813   if (v  || idx) {
1814     if (nztot) {
1815       /* Sort by increasing column numbers, assuming A and B already sorted */
1816       PetscInt imark = -1;
1817       if (v) {
1818         *v = v_p = mat->rowvalues;
1819         for (i=0; i<nzB; i++) {
1820           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1821           else break;
1822         }
1823         imark = i;
1824         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1825         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1826       }
1827       if (idx) {
1828         *idx = idx_p = mat->rowindices;
1829         if (imark > -1) {
1830           for (i=0; i<imark; i++) {
1831             idx_p[i] = cmap[cworkB[i]];
1832           }
1833         } else {
1834           for (i=0; i<nzB; i++) {
1835             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1836             else break;
1837           }
1838           imark = i;
1839         }
1840         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1841         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1842       }
1843     } else {
1844       if (idx) *idx = 0;
1845       if (v)   *v   = 0;
1846     }
1847   }
1848   *nz  = nztot;
1849   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1850   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1857 {
1858   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1859 
1860   PetscFunctionBegin;
1861   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1862   aij->getrowactive = PETSC_FALSE;
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatNorm_MPIAIJ"
1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1869 {
1870   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1871   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1872   PetscErrorCode ierr;
1873   PetscInt       i,j,cstart = mat->cmap->rstart;
1874   PetscReal      sum = 0.0;
1875   MatScalar      *v;
1876 
1877   PetscFunctionBegin;
1878   if (aij->size == 1) {
1879     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1880   } else {
1881     if (type == NORM_FROBENIUS) {
1882       v = amat->a;
1883       for (i=0; i<amat->nz; i++) {
1884         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1885       }
1886       v = bmat->a;
1887       for (i=0; i<bmat->nz; i++) {
1888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1889       }
1890       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1891       *norm = PetscSqrtReal(*norm);
1892     } else if (type == NORM_1) { /* max column norm */
1893       PetscReal *tmp,*tmp2;
1894       PetscInt  *jj,*garray = aij->garray;
1895       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1896       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1897       *norm = 0.0;
1898       v     = amat->a; jj = amat->j;
1899       for (j=0; j<amat->nz; j++) {
1900         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1901       }
1902       v = bmat->a; jj = bmat->j;
1903       for (j=0; j<bmat->nz; j++) {
1904         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1905       }
1906       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907       for (j=0; j<mat->cmap->N; j++) {
1908         if (tmp2[j] > *norm) *norm = tmp2[j];
1909       }
1910       ierr = PetscFree(tmp);CHKERRQ(ierr);
1911       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1912     } else if (type == NORM_INFINITY) { /* max row norm */
1913       PetscReal ntemp = 0.0;
1914       for (j=0; j<aij->A->rmap->n; j++) {
1915         v   = amat->a + amat->i[j];
1916         sum = 0.0;
1917         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1918           sum += PetscAbsScalar(*v); v++;
1919         }
1920         v = bmat->a + bmat->i[j];
1921         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1922           sum += PetscAbsScalar(*v); v++;
1923         }
1924         if (sum > ntemp) ntemp = sum;
1925       }
1926       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatTranspose_MPIAIJ"
1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935 {
1936   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1937   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1938   PetscErrorCode ierr;
1939   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1940   PetscInt       cstart = A->cmap->rstart,ncol;
1941   Mat            B;
1942   MatScalar      *array;
1943 
1944   PetscFunctionBegin;
1945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1946 
1947   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1948   ai = Aloc->i; aj = Aloc->j;
1949   bi = Bloc->i; bj = Bloc->j;
1950   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1952     PetscSFNode          *oloc;
1953     PETSC_UNUSED PetscSF sf;
1954 
1955     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1956     /* compute d_nnz for preallocation */
1957     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1958     for (i=0; i<ai[ma]; i++) {
1959       d_nnz[aj[i]]++;
1960       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1961     }
1962     /* compute local off-diagonal contributions */
1963     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1964     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1965     /* map those to global */
1966     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1967     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1968     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1969     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1970     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1971     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1972     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1973 
1974     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1975     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1976     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1977     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1978     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1979     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1980   } else {
1981     B    = *matout;
1982     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1983     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1984   }
1985 
1986   /* copy over the A part */
1987   array = Aloc->a;
1988   row   = A->rmap->rstart;
1989   for (i=0; i<ma; i++) {
1990     ncol = ai[i+1]-ai[i];
1991     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1992     row++;
1993     array += ncol; aj += ncol;
1994   }
1995   aj = Aloc->j;
1996   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1997 
1998   /* copy over the B part */
1999   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2000   array = Bloc->a;
2001   row   = A->rmap->rstart;
2002   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2003   cols_tmp = cols;
2004   for (i=0; i<mb; i++) {
2005     ncol = bi[i+1]-bi[i];
2006     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2007     row++;
2008     array += ncol; cols_tmp += ncol;
2009   }
2010   ierr = PetscFree(cols);CHKERRQ(ierr);
2011 
2012   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2014   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2015     *matout = B;
2016   } else {
2017     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2025 {
2026   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2027   Mat            a    = aij->A,b = aij->B;
2028   PetscErrorCode ierr;
2029   PetscInt       s1,s2,s3;
2030 
2031   PetscFunctionBegin;
2032   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2033   if (rr) {
2034     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2035     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2036     /* Overlap communication with computation. */
2037     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2038   }
2039   if (ll) {
2040     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2041     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2042     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2043   }
2044   /* scale  the diagonal block */
2045   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2046 
2047   if (rr) {
2048     /* Do a scatter end and then right scale the off-diagonal block */
2049     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2050     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2051   }
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2058 {
2059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatEqual_MPIAIJ"
2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2070 {
2071   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2072   Mat            a,b,c,d;
2073   PetscBool      flg;
2074   PetscErrorCode ierr;
2075 
2076   PetscFunctionBegin;
2077   a = matA->A; b = matA->B;
2078   c = matB->A; d = matB->B;
2079 
2080   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2081   if (flg) {
2082     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2083   }
2084   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatCopy_MPIAIJ"
2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2091 {
2092   PetscErrorCode ierr;
2093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2094   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2095 
2096   PetscFunctionBegin;
2097   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2098   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2099     /* because of the column compression in the off-processor part of the matrix a->B,
2100        the number of columns in a->B and b->B may be different, hence we cannot call
2101        the MatCopy() directly on the two parts. If need be, we can provide a more
2102        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2103        then copying the submatrices */
2104     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2105   } else {
2106     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2107     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatSetUp_MPIAIJ"
2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2115 {
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 /*
2124    Computes the number of nonzeros per row needed for preallocation when X and Y
2125    have different nonzero structure.
2126 */
2127 #undef __FUNCT__
2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2130 {
2131   PetscInt       i,j,k,nzx,nzy;
2132 
2133   PetscFunctionBegin;
2134   /* Set the number of nonzeros in the new matrix */
2135   for (i=0; i<m; i++) {
2136     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2137     nzx = xi[i+1] - xi[i];
2138     nzy = yi[i+1] - yi[i];
2139     nnz[i] = 0;
2140     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2141       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2142       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2143       nnz[i]++;
2144     }
2145     for (; k<nzy; k++) nnz[i]++;
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2154 {
2155   PetscErrorCode ierr;
2156   PetscInt       m = Y->rmap->N;
2157   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2158   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatAXPY_MPIAIJ"
2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2168 {
2169   PetscErrorCode ierr;
2170   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2171   PetscBLASInt   bnz,one=1;
2172   Mat_SeqAIJ     *x,*y;
2173 
2174   PetscFunctionBegin;
2175   if (str == SAME_NONZERO_PATTERN) {
2176     PetscScalar alpha = a;
2177     x    = (Mat_SeqAIJ*)xx->A->data;
2178     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2179     y    = (Mat_SeqAIJ*)yy->A->data;
2180     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2181     x    = (Mat_SeqAIJ*)xx->B->data;
2182     y    = (Mat_SeqAIJ*)yy->B->data;
2183     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2184     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2185     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2186   } else if (str == SUBSET_NONZERO_PATTERN) {
2187     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2188   } else {
2189     Mat      B;
2190     PetscInt *nnz_d,*nnz_o;
2191     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2192     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2193     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2194     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2195     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2196     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2197     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2198     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2199     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2200     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2201     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2202     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2203     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2204     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2205   }
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "MatConjugate_MPIAIJ"
2213 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2214 {
2215 #if defined(PETSC_USE_COMPLEX)
2216   PetscErrorCode ierr;
2217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2218 
2219   PetscFunctionBegin;
2220   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2221   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2222 #else
2223   PetscFunctionBegin;
2224 #endif
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "MatRealPart_MPIAIJ"
2230 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2231 {
2232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2233   PetscErrorCode ierr;
2234 
2235   PetscFunctionBegin;
2236   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2237   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2243 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2244 {
2245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2246   PetscErrorCode ierr;
2247 
2248   PetscFunctionBegin;
2249   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2250   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #if defined(PETSC_HAVE_PBGL)
2255 
2256 #include <boost/parallel/mpi/bsp_process_group.hpp>
2257 #include <boost/graph/distributed/ilu_default_graph.hpp>
2258 #include <boost/graph/distributed/ilu_0_block.hpp>
2259 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2260 #include <boost/graph/distributed/petsc/interface.hpp>
2261 #include <boost/multi_array.hpp>
2262 #include <boost/parallel/distributed_property_map->hpp>
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2266 /*
2267   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2268 */
2269 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2270 {
2271   namespace petsc = boost::distributed::petsc;
2272 
2273   namespace graph_dist = boost::graph::distributed;
2274   using boost::graph::distributed::ilu_default::process_group_type;
2275   using boost::graph::ilu_permuted;
2276 
2277   PetscBool      row_identity, col_identity;
2278   PetscContainer c;
2279   PetscInt       m, n, M, N;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2284   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2285   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2286   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2287 
2288   process_group_type pg;
2289   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2290   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2291   lgraph_type& level_graph = *lgraph_p;
2292   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2293 
2294   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2295   ilu_permuted(level_graph);
2296 
2297   /* put together the new matrix */
2298   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2299   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2300   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2301   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2302   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2303   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2304   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306 
2307   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2308   ierr = PetscContainerSetPointer(c, lgraph_p);
2309   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2310   ierr = PetscContainerDestroy(&c);
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2316 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2317 {
2318   PetscFunctionBegin;
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatSolve_MPIAIJ"
2324 /*
2325   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2326 */
2327 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2328 {
2329   namespace graph_dist = boost::graph::distributed;
2330 
2331   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2332   lgraph_type    *lgraph_p;
2333   PetscContainer c;
2334   PetscErrorCode ierr;
2335 
2336   PetscFunctionBegin;
2337   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2338   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2339   ierr = VecCopy(b, x);CHKERRQ(ierr);
2340 
2341   PetscScalar *array_x;
2342   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2343   PetscInt sx;
2344   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2345 
2346   PetscScalar *array_b;
2347   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2348   PetscInt sb;
2349   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2350 
2351   lgraph_type& level_graph = *lgraph_p;
2352   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2353 
2354   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2355   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2356   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2357 
2358   typedef boost::iterator_property_map<array_ref_type::iterator,
2359                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2360   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2361   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2362 
2363   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2364   PetscFunctionReturn(0);
2365 }
2366 #endif
2367 
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2371 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2372 {
2373   PetscMPIInt    rank,size;
2374   MPI_Comm       comm;
2375   PetscErrorCode ierr;
2376   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2377   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2378   PetscInt       *rowrange = mat->rmap->range;
2379   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2380   Mat            A = aij->A,B=aij->B,C=*matredundant;
2381   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2382   PetscScalar    *sbuf_a;
2383   PetscInt       nzlocal=a->nz+b->nz;
2384   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2385   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2386   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2387   MatScalar      *aworkA,*aworkB;
2388   PetscScalar    *vals;
2389   PetscMPIInt    tag1,tag2,tag3,imdex;
2390   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2391   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2392   MPI_Status     recv_status,*send_status;
2393   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2394   PetscInt       **rbuf_j=NULL;
2395   PetscScalar    **rbuf_a=NULL;
2396   Mat_Redundant  *redund =NULL;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2400   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2401   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2402   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2403   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2404 
2405   if (reuse == MAT_REUSE_MATRIX) {
2406     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2407     if (subsize == 1) {
2408       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2409       redund = c->redundant;
2410     } else {
2411       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2412       redund = c->redundant;
2413     }
2414     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2415 
2416     nsends    = redund->nsends;
2417     nrecvs    = redund->nrecvs;
2418     send_rank = redund->send_rank;
2419     recv_rank = redund->recv_rank;
2420     sbuf_nz   = redund->sbuf_nz;
2421     rbuf_nz   = redund->rbuf_nz;
2422     sbuf_j    = redund->sbuf_j;
2423     sbuf_a    = redund->sbuf_a;
2424     rbuf_j    = redund->rbuf_j;
2425     rbuf_a    = redund->rbuf_a;
2426   }
2427 
2428   if (reuse == MAT_INITIAL_MATRIX) {
2429     PetscInt    nleftover,np_subcomm;
2430 
2431     /* get the destination processors' id send_rank, nsends and nrecvs */
2432     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2433 
2434     np_subcomm = size/nsubcomm;
2435     nleftover  = size - nsubcomm*np_subcomm;
2436 
2437     /* block of codes below is specific for INTERLACED */
2438     /* ------------------------------------------------*/
2439     nsends = 0; nrecvs = 0;
2440     for (i=0; i<size; i++) {
2441       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2442         send_rank[nsends++] = i;
2443         recv_rank[nrecvs++] = i;
2444       }
2445     }
2446     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2447       i = size-nleftover-1;
2448       j = 0;
2449       while (j < nsubcomm - nleftover) {
2450         send_rank[nsends++] = i;
2451         i--; j++;
2452       }
2453     }
2454 
2455     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2456       for (i=0; i<nleftover; i++) {
2457         recv_rank[nrecvs++] = size-nleftover+i;
2458       }
2459     }
2460     /*----------------------------------------------*/
2461 
2462     /* allocate sbuf_j, sbuf_a */
2463     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2464     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2465     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2466     /*
2467     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2468     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2469      */
2470   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2471 
2472   /* copy mat's local entries into the buffers */
2473   if (reuse == MAT_INITIAL_MATRIX) {
2474     rownz_max = 0;
2475     rptr      = sbuf_j;
2476     cols      = sbuf_j + rend-rstart + 1;
2477     vals      = sbuf_a;
2478     rptr[0]   = 0;
2479     for (i=0; i<rend-rstart; i++) {
2480       row    = i + rstart;
2481       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2482       ncols  = nzA + nzB;
2483       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2484       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2485       /* load the column indices for this row into cols */
2486       lwrite = 0;
2487       for (l=0; l<nzB; l++) {
2488         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2489           vals[lwrite]   = aworkB[l];
2490           cols[lwrite++] = ctmp;
2491         }
2492       }
2493       for (l=0; l<nzA; l++) {
2494         vals[lwrite]   = aworkA[l];
2495         cols[lwrite++] = cstart + cworkA[l];
2496       }
2497       for (l=0; l<nzB; l++) {
2498         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2499           vals[lwrite]   = aworkB[l];
2500           cols[lwrite++] = ctmp;
2501         }
2502       }
2503       vals     += ncols;
2504       cols     += ncols;
2505       rptr[i+1] = rptr[i] + ncols;
2506       if (rownz_max < ncols) rownz_max = ncols;
2507     }
2508     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2509   } else { /* only copy matrix values into sbuf_a */
2510     rptr    = sbuf_j;
2511     vals    = sbuf_a;
2512     rptr[0] = 0;
2513     for (i=0; i<rend-rstart; i++) {
2514       row    = i + rstart;
2515       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2516       ncols  = nzA + nzB;
2517       cworkB = b->j + b->i[i];
2518       aworkA = a->a + a->i[i];
2519       aworkB = b->a + b->i[i];
2520       lwrite = 0;
2521       for (l=0; l<nzB; l++) {
2522         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2523       }
2524       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2525       for (l=0; l<nzB; l++) {
2526         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2527       }
2528       vals     += ncols;
2529       rptr[i+1] = rptr[i] + ncols;
2530     }
2531   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2532 
2533   /* send nzlocal to others, and recv other's nzlocal */
2534   /*--------------------------------------------------*/
2535   if (reuse == MAT_INITIAL_MATRIX) {
2536     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2537 
2538     s_waits2 = s_waits3 + nsends;
2539     s_waits1 = s_waits2 + nsends;
2540     r_waits1 = s_waits1 + nsends;
2541     r_waits2 = r_waits1 + nrecvs;
2542     r_waits3 = r_waits2 + nrecvs;
2543   } else {
2544     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2545 
2546     r_waits3 = s_waits3 + nsends;
2547   }
2548 
2549   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2550   if (reuse == MAT_INITIAL_MATRIX) {
2551     /* get new tags to keep the communication clean */
2552     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2553     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2554     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2555 
2556     /* post receives of other's nzlocal */
2557     for (i=0; i<nrecvs; i++) {
2558       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2559     }
2560     /* send nzlocal to others */
2561     for (i=0; i<nsends; i++) {
2562       sbuf_nz[i] = nzlocal;
2563       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2564     }
2565     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2566     count = nrecvs;
2567     while (count) {
2568       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2569 
2570       recv_rank[imdex] = recv_status.MPI_SOURCE;
2571       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2572       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2573 
2574       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2575 
2576       rbuf_nz[imdex] += i + 2;
2577 
2578       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2579       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2580       count--;
2581     }
2582     /* wait on sends of nzlocal */
2583     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2584     /* send mat->i,j to others, and recv from other's */
2585     /*------------------------------------------------*/
2586     for (i=0; i<nsends; i++) {
2587       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2588       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2589     }
2590     /* wait on receives of mat->i,j */
2591     /*------------------------------*/
2592     count = nrecvs;
2593     while (count) {
2594       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2595       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2596       count--;
2597     }
2598     /* wait on sends of mat->i,j */
2599     /*---------------------------*/
2600     if (nsends) {
2601       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2602     }
2603   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2604 
2605   /* post receives, send and receive mat->a */
2606   /*----------------------------------------*/
2607   for (imdex=0; imdex<nrecvs; imdex++) {
2608     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2609   }
2610   for (i=0; i<nsends; i++) {
2611     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2612   }
2613   count = nrecvs;
2614   while (count) {
2615     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2616     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2617     count--;
2618   }
2619   if (nsends) {
2620     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2621   }
2622 
2623   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2624 
2625   /* create redundant matrix */
2626   /*-------------------------*/
2627   if (reuse == MAT_INITIAL_MATRIX) {
2628     const PetscInt *range;
2629     PetscInt       rstart_sub,rend_sub,mloc_sub;
2630 
2631     /* compute rownz_max for preallocation */
2632     for (imdex=0; imdex<nrecvs; imdex++) {
2633       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2634       rptr = rbuf_j[imdex];
2635       for (i=0; i<j; i++) {
2636         ncols = rptr[i+1] - rptr[i];
2637         if (rownz_max < ncols) rownz_max = ncols;
2638       }
2639     }
2640 
2641     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2642 
2643     /* get local size of redundant matrix
2644        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2645     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2646     rstart_sub = range[nsubcomm*subrank];
2647     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2648       rend_sub = range[nsubcomm*(subrank+1)];
2649     } else {
2650       rend_sub = mat->rmap->N;
2651     }
2652     mloc_sub = rend_sub - rstart_sub;
2653 
2654     if (M == N) {
2655       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2656     } else { /* non-square matrix */
2657       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2658     }
2659     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2660     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2661     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2662     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2663   } else {
2664     C = *matredundant;
2665   }
2666 
2667   /* insert local matrix entries */
2668   rptr = sbuf_j;
2669   cols = sbuf_j + rend-rstart + 1;
2670   vals = sbuf_a;
2671   for (i=0; i<rend-rstart; i++) {
2672     row   = i + rstart;
2673     ncols = rptr[i+1] - rptr[i];
2674     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2675     vals += ncols;
2676     cols += ncols;
2677   }
2678   /* insert received matrix entries */
2679   for (imdex=0; imdex<nrecvs; imdex++) {
2680     rstart = rowrange[recv_rank[imdex]];
2681     rend   = rowrange[recv_rank[imdex]+1];
2682     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2683     rptr   = rbuf_j[imdex];
2684     cols   = rbuf_j[imdex] + rend-rstart + 1;
2685     vals   = rbuf_a[imdex];
2686     for (i=0; i<rend-rstart; i++) {
2687       row   = i + rstart;
2688       ncols = rptr[i+1] - rptr[i];
2689       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2690       vals += ncols;
2691       cols += ncols;
2692     }
2693   }
2694   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2695   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2696 
2697   if (reuse == MAT_INITIAL_MATRIX) {
2698     *matredundant = C;
2699 
2700     /* create a supporting struct and attach it to C for reuse */
2701     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2702     if (subsize == 1) {
2703       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2704       c->redundant = redund;
2705     } else {
2706       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2707       c->redundant = redund;
2708     }
2709 
2710     redund->nzlocal   = nzlocal;
2711     redund->nsends    = nsends;
2712     redund->nrecvs    = nrecvs;
2713     redund->send_rank = send_rank;
2714     redund->recv_rank = recv_rank;
2715     redund->sbuf_nz   = sbuf_nz;
2716     redund->rbuf_nz   = rbuf_nz;
2717     redund->sbuf_j    = sbuf_j;
2718     redund->sbuf_a    = sbuf_a;
2719     redund->rbuf_j    = rbuf_j;
2720     redund->rbuf_a    = rbuf_a;
2721     redund->psubcomm  = NULL;
2722   }
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2728 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2729 {
2730   PetscErrorCode ierr;
2731   MPI_Comm       comm;
2732   PetscMPIInt    size,subsize;
2733   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2734   Mat_Redundant  *redund=NULL;
2735   PetscSubcomm   psubcomm=NULL;
2736   MPI_Comm       subcomm_in=subcomm;
2737   Mat            *matseq;
2738   IS             isrow,iscol;
2739 
2740   PetscFunctionBegin;
2741   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2742     if (reuse ==  MAT_INITIAL_MATRIX) {
2743       /* create psubcomm, then get subcomm */
2744       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2745       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2746       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2747 
2748       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2749       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2750       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2751       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2752       subcomm = psubcomm->comm;
2753     } else { /* retrieve psubcomm and subcomm */
2754       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2755       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2756       if (subsize == 1) {
2757         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2758         redund = c->redundant;
2759       } else {
2760         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2761         redund = c->redundant;
2762       }
2763       psubcomm = redund->psubcomm;
2764     }
2765     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2766       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2767       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2768         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2769         if (subsize == 1) {
2770           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2771           c->redundant->psubcomm = psubcomm;
2772         } else {
2773           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2774           c->redundant->psubcomm = psubcomm ;
2775         }
2776       }
2777       PetscFunctionReturn(0);
2778     }
2779   }
2780 
2781   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2782   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2783   if (reuse == MAT_INITIAL_MATRIX) {
2784     /* create a local sequential matrix matseq[0] */
2785     mloc_sub = PETSC_DECIDE;
2786     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2787     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2788     rstart = rend - mloc_sub;
2789     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2790     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2791   } else { /* reuse == MAT_REUSE_MATRIX */
2792     if (subsize == 1) {
2793       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2794       redund = c->redundant;
2795     } else {
2796       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2797       redund = c->redundant;
2798     }
2799 
2800     isrow  = redund->isrow;
2801     iscol  = redund->iscol;
2802     matseq = redund->matseq;
2803   }
2804   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2805   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2806 
2807   if (reuse == MAT_INITIAL_MATRIX) {
2808     /* create a supporting struct and attach it to C for reuse */
2809     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2810     if (subsize == 1) {
2811       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2812       c->redundant = redund;
2813     } else {
2814       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2815       c->redundant = redund;
2816     }
2817     redund->isrow    = isrow;
2818     redund->iscol    = iscol;
2819     redund->matseq   = matseq;
2820     redund->psubcomm = psubcomm;
2821   }
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 #undef __FUNCT__
2826 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2827 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2828 {
2829   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2830   PetscErrorCode ierr;
2831   PetscInt       i,*idxb = 0;
2832   PetscScalar    *va,*vb;
2833   Vec            vtmp;
2834 
2835   PetscFunctionBegin;
2836   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2837   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2838   if (idx) {
2839     for (i=0; i<A->rmap->n; i++) {
2840       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2841     }
2842   }
2843 
2844   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2845   if (idx) {
2846     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2847   }
2848   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2849   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2850 
2851   for (i=0; i<A->rmap->n; i++) {
2852     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2853       va[i] = vb[i];
2854       if (idx) idx[i] = a->garray[idxb[i]];
2855     }
2856   }
2857 
2858   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2859   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2860   ierr = PetscFree(idxb);CHKERRQ(ierr);
2861   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2867 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2868 {
2869   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2870   PetscErrorCode ierr;
2871   PetscInt       i,*idxb = 0;
2872   PetscScalar    *va,*vb;
2873   Vec            vtmp;
2874 
2875   PetscFunctionBegin;
2876   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2877   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2878   if (idx) {
2879     for (i=0; i<A->cmap->n; i++) {
2880       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2881     }
2882   }
2883 
2884   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2885   if (idx) {
2886     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2887   }
2888   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2889   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2890 
2891   for (i=0; i<A->rmap->n; i++) {
2892     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2893       va[i] = vb[i];
2894       if (idx) idx[i] = a->garray[idxb[i]];
2895     }
2896   }
2897 
2898   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2899   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2900   ierr = PetscFree(idxb);CHKERRQ(ierr);
2901   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2907 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2908 {
2909   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2910   PetscInt       n      = A->rmap->n;
2911   PetscInt       cstart = A->cmap->rstart;
2912   PetscInt       *cmap  = mat->garray;
2913   PetscInt       *diagIdx, *offdiagIdx;
2914   Vec            diagV, offdiagV;
2915   PetscScalar    *a, *diagA, *offdiagA;
2916   PetscInt       r;
2917   PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2921   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2922   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2923   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2924   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2925   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2926   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2927   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2928   for (r = 0; r < n; ++r) {
2929     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2930       a[r]   = diagA[r];
2931       idx[r] = cstart + diagIdx[r];
2932     } else {
2933       a[r]   = offdiagA[r];
2934       idx[r] = cmap[offdiagIdx[r]];
2935     }
2936   }
2937   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2938   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2939   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2940   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2941   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2942   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2948 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2949 {
2950   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2951   PetscInt       n      = A->rmap->n;
2952   PetscInt       cstart = A->cmap->rstart;
2953   PetscInt       *cmap  = mat->garray;
2954   PetscInt       *diagIdx, *offdiagIdx;
2955   Vec            diagV, offdiagV;
2956   PetscScalar    *a, *diagA, *offdiagA;
2957   PetscInt       r;
2958   PetscErrorCode ierr;
2959 
2960   PetscFunctionBegin;
2961   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2962   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2963   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2964   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2965   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2966   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2967   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2968   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2969   for (r = 0; r < n; ++r) {
2970     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2971       a[r]   = diagA[r];
2972       idx[r] = cstart + diagIdx[r];
2973     } else {
2974       a[r]   = offdiagA[r];
2975       idx[r] = cmap[offdiagIdx[r]];
2976     }
2977   }
2978   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2979   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2980   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2981   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2982   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2983   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 #undef __FUNCT__
2988 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2989 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2990 {
2991   PetscErrorCode ierr;
2992   Mat            *dummy;
2993 
2994   PetscFunctionBegin;
2995   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2996   *newmat = *dummy;
2997   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 #undef __FUNCT__
3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3003 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3004 {
3005   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3006   PetscErrorCode ierr;
3007 
3008   PetscFunctionBegin;
3009   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3015 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3016 {
3017   PetscErrorCode ierr;
3018   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3019 
3020   PetscFunctionBegin;
3021   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3022   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3023   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 /* -------------------------------------------------------------------*/
3029 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3030                                        MatGetRow_MPIAIJ,
3031                                        MatRestoreRow_MPIAIJ,
3032                                        MatMult_MPIAIJ,
3033                                 /* 4*/ MatMultAdd_MPIAIJ,
3034                                        MatMultTranspose_MPIAIJ,
3035                                        MatMultTransposeAdd_MPIAIJ,
3036 #if defined(PETSC_HAVE_PBGL)
3037                                        MatSolve_MPIAIJ,
3038 #else
3039                                        0,
3040 #endif
3041                                        0,
3042                                        0,
3043                                 /*10*/ 0,
3044                                        0,
3045                                        0,
3046                                        MatSOR_MPIAIJ,
3047                                        MatTranspose_MPIAIJ,
3048                                 /*15*/ MatGetInfo_MPIAIJ,
3049                                        MatEqual_MPIAIJ,
3050                                        MatGetDiagonal_MPIAIJ,
3051                                        MatDiagonalScale_MPIAIJ,
3052                                        MatNorm_MPIAIJ,
3053                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3054                                        MatAssemblyEnd_MPIAIJ,
3055                                        MatSetOption_MPIAIJ,
3056                                        MatZeroEntries_MPIAIJ,
3057                                 /*24*/ MatZeroRows_MPIAIJ,
3058                                        0,
3059 #if defined(PETSC_HAVE_PBGL)
3060                                        0,
3061 #else
3062                                        0,
3063 #endif
3064                                        0,
3065                                        0,
3066                                 /*29*/ MatSetUp_MPIAIJ,
3067 #if defined(PETSC_HAVE_PBGL)
3068                                        0,
3069 #else
3070                                        0,
3071 #endif
3072                                        0,
3073                                        0,
3074                                        0,
3075                                 /*34*/ MatDuplicate_MPIAIJ,
3076                                        0,
3077                                        0,
3078                                        0,
3079                                        0,
3080                                 /*39*/ MatAXPY_MPIAIJ,
3081                                        MatGetSubMatrices_MPIAIJ,
3082                                        MatIncreaseOverlap_MPIAIJ,
3083                                        MatGetValues_MPIAIJ,
3084                                        MatCopy_MPIAIJ,
3085                                 /*44*/ MatGetRowMax_MPIAIJ,
3086                                        MatScale_MPIAIJ,
3087                                        0,
3088                                        0,
3089                                        MatZeroRowsColumns_MPIAIJ,
3090                                 /*49*/ MatSetRandom_MPIAIJ,
3091                                        0,
3092                                        0,
3093                                        0,
3094                                        0,
3095                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3096                                        0,
3097                                        MatSetUnfactored_MPIAIJ,
3098                                        MatPermute_MPIAIJ,
3099                                        0,
3100                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3101                                        MatDestroy_MPIAIJ,
3102                                        MatView_MPIAIJ,
3103                                        0,
3104                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3105                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3106                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3111                                        MatGetRowMinAbs_MPIAIJ,
3112                                        0,
3113                                        MatSetColoring_MPIAIJ,
3114                                        0,
3115                                        MatSetValuesAdifor_MPIAIJ,
3116                                 /*75*/ MatFDColoringApply_AIJ,
3117                                        0,
3118                                        0,
3119                                        0,
3120                                        MatFindZeroDiagonals_MPIAIJ,
3121                                 /*80*/ 0,
3122                                        0,
3123                                        0,
3124                                 /*83*/ MatLoad_MPIAIJ,
3125                                        0,
3126                                        0,
3127                                        0,
3128                                        0,
3129                                        0,
3130                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3131                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3132                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3133                                        MatPtAP_MPIAIJ_MPIAIJ,
3134                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3135                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3136                                        0,
3137                                        0,
3138                                        0,
3139                                        0,
3140                                 /*99*/ 0,
3141                                        0,
3142                                        0,
3143                                        MatConjugate_MPIAIJ,
3144                                        0,
3145                                 /*104*/MatSetValuesRow_MPIAIJ,
3146                                        MatRealPart_MPIAIJ,
3147                                        MatImaginaryPart_MPIAIJ,
3148                                        0,
3149                                        0,
3150                                 /*109*/0,
3151                                        MatGetRedundantMatrix_MPIAIJ,
3152                                        MatGetRowMin_MPIAIJ,
3153                                        0,
3154                                        0,
3155                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3156                                        0,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                 /*119*/0,
3161                                        0,
3162                                        0,
3163                                        0,
3164                                        MatGetMultiProcBlock_MPIAIJ,
3165                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3166                                        MatGetColumnNorms_MPIAIJ,
3167                                        MatInvertBlockDiagonal_MPIAIJ,
3168                                        0,
3169                                        MatGetSubMatricesParallel_MPIAIJ,
3170                                 /*129*/0,
3171                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3172                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3173                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3174                                        0,
3175                                 /*134*/0,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                        0,
3180                                 /*139*/0,
3181                                        0,
3182                                        0,
3183                                        MatFDColoringSetUp_MPIXAIJ
3184 };
3185 
3186 /* ----------------------------------------------------------------------------------------*/
3187 
3188 #undef __FUNCT__
3189 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3190 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3191 {
3192   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3193   PetscErrorCode ierr;
3194 
3195   PetscFunctionBegin;
3196   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3197   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 #undef __FUNCT__
3202 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3203 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3204 {
3205   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3206   PetscErrorCode ierr;
3207 
3208   PetscFunctionBegin;
3209   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3210   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3211   PetscFunctionReturn(0);
3212 }
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3216 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3217 {
3218   Mat_MPIAIJ     *b;
3219   PetscErrorCode ierr;
3220 
3221   PetscFunctionBegin;
3222   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3223   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3224   b = (Mat_MPIAIJ*)B->data;
3225 
3226   if (!B->preallocated) {
3227     /* Explicitly create 2 MATSEQAIJ matrices. */
3228     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3229     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3230     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3231     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3232     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3233     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3234     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3235     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3236     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3237     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3238   }
3239 
3240   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3241   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3242   B->preallocated = PETSC_TRUE;
3243   PetscFunctionReturn(0);
3244 }
3245 
3246 #undef __FUNCT__
3247 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3248 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3249 {
3250   Mat            mat;
3251   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3252   PetscErrorCode ierr;
3253 
3254   PetscFunctionBegin;
3255   *newmat = 0;
3256   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3257   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3258   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3259   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3260   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3261   a       = (Mat_MPIAIJ*)mat->data;
3262 
3263   mat->factortype   = matin->factortype;
3264   mat->assembled    = PETSC_TRUE;
3265   mat->insertmode   = NOT_SET_VALUES;
3266   mat->preallocated = PETSC_TRUE;
3267 
3268   a->size         = oldmat->size;
3269   a->rank         = oldmat->rank;
3270   a->donotstash   = oldmat->donotstash;
3271   a->roworiented  = oldmat->roworiented;
3272   a->rowindices   = 0;
3273   a->rowvalues    = 0;
3274   a->getrowactive = PETSC_FALSE;
3275 
3276   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3277   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3278 
3279   if (oldmat->colmap) {
3280 #if defined(PETSC_USE_CTABLE)
3281     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3282 #else
3283     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3284     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3285     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3286 #endif
3287   } else a->colmap = 0;
3288   if (oldmat->garray) {
3289     PetscInt len;
3290     len  = oldmat->B->cmap->n;
3291     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3292     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3293     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3294   } else a->garray = 0;
3295 
3296   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3297   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3298   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3299   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3300   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3301   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3302   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3303   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3304   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3305   *newmat = mat;
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 
3310 
3311 #undef __FUNCT__
3312 #define __FUNCT__ "MatLoad_MPIAIJ"
3313 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3314 {
3315   PetscScalar    *vals,*svals;
3316   MPI_Comm       comm;
3317   PetscErrorCode ierr;
3318   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3319   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3320   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3321   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3322   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3323   int            fd;
3324   PetscInt       bs = 1;
3325 
3326   PetscFunctionBegin;
3327   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3328   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3329   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3330   if (!rank) {
3331     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3332     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3333     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3334   }
3335 
3336   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3337   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3338   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3339 
3340   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3341 
3342   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3343   M    = header[1]; N = header[2];
3344   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3345   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3346   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3347 
3348   /* If global sizes are set, check if they are consistent with that given in the file */
3349   if (sizesset) {
3350     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3351   }
3352   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3353   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3354 
3355   /* determine ownership of all (block) rows */
3356   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3357   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3358   else m = newMat->rmap->n; /* Set by user */
3359 
3360   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3361   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3362 
3363   /* First process needs enough room for process with most rows */
3364   if (!rank) {
3365     mmax = rowners[1];
3366     for (i=2; i<=size; i++) {
3367       mmax = PetscMax(mmax, rowners[i]);
3368     }
3369   } else mmax = -1;             /* unused, but compilers complain */
3370 
3371   rowners[0] = 0;
3372   for (i=2; i<=size; i++) {
3373     rowners[i] += rowners[i-1];
3374   }
3375   rstart = rowners[rank];
3376   rend   = rowners[rank+1];
3377 
3378   /* distribute row lengths to all processors */
3379   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3380   if (!rank) {
3381     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3382     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3383     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3384     for (j=0; j<m; j++) {
3385       procsnz[0] += ourlens[j];
3386     }
3387     for (i=1; i<size; i++) {
3388       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3389       /* calculate the number of nonzeros on each processor */
3390       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3391         procsnz[i] += rowlengths[j];
3392       }
3393       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3394     }
3395     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3396   } else {
3397     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3398   }
3399 
3400   if (!rank) {
3401     /* determine max buffer needed and allocate it */
3402     maxnz = 0;
3403     for (i=0; i<size; i++) {
3404       maxnz = PetscMax(maxnz,procsnz[i]);
3405     }
3406     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3407 
3408     /* read in my part of the matrix column indices  */
3409     nz   = procsnz[0];
3410     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3411     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3412 
3413     /* read in every one elses and ship off */
3414     for (i=1; i<size; i++) {
3415       nz   = procsnz[i];
3416       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3417       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3418     }
3419     ierr = PetscFree(cols);CHKERRQ(ierr);
3420   } else {
3421     /* determine buffer space needed for message */
3422     nz = 0;
3423     for (i=0; i<m; i++) {
3424       nz += ourlens[i];
3425     }
3426     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3427 
3428     /* receive message of column indices*/
3429     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3430   }
3431 
3432   /* determine column ownership if matrix is not square */
3433   if (N != M) {
3434     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3435     else n = newMat->cmap->n;
3436     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3437     cstart = cend - n;
3438   } else {
3439     cstart = rstart;
3440     cend   = rend;
3441     n      = cend - cstart;
3442   }
3443 
3444   /* loop over local rows, determining number of off diagonal entries */
3445   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3446   jj   = 0;
3447   for (i=0; i<m; i++) {
3448     for (j=0; j<ourlens[i]; j++) {
3449       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3450       jj++;
3451     }
3452   }
3453 
3454   for (i=0; i<m; i++) {
3455     ourlens[i] -= offlens[i];
3456   }
3457   if (!sizesset) {
3458     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3459   }
3460 
3461   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3462 
3463   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3464 
3465   for (i=0; i<m; i++) {
3466     ourlens[i] += offlens[i];
3467   }
3468 
3469   if (!rank) {
3470     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3471 
3472     /* read in my part of the matrix numerical values  */
3473     nz   = procsnz[0];
3474     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3475 
3476     /* insert into matrix */
3477     jj      = rstart;
3478     smycols = mycols;
3479     svals   = vals;
3480     for (i=0; i<m; i++) {
3481       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3482       smycols += ourlens[i];
3483       svals   += ourlens[i];
3484       jj++;
3485     }
3486 
3487     /* read in other processors and ship out */
3488     for (i=1; i<size; i++) {
3489       nz   = procsnz[i];
3490       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3491       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3492     }
3493     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3494   } else {
3495     /* receive numeric values */
3496     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3497 
3498     /* receive message of values*/
3499     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3500 
3501     /* insert into matrix */
3502     jj      = rstart;
3503     smycols = mycols;
3504     svals   = vals;
3505     for (i=0; i<m; i++) {
3506       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3507       smycols += ourlens[i];
3508       svals   += ourlens[i];
3509       jj++;
3510     }
3511   }
3512   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3513   ierr = PetscFree(vals);CHKERRQ(ierr);
3514   ierr = PetscFree(mycols);CHKERRQ(ierr);
3515   ierr = PetscFree(rowners);CHKERRQ(ierr);
3516   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3517   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3518   PetscFunctionReturn(0);
3519 }
3520 
3521 #undef __FUNCT__
3522 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3523 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3524 {
3525   PetscErrorCode ierr;
3526   IS             iscol_local;
3527   PetscInt       csize;
3528 
3529   PetscFunctionBegin;
3530   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3531   if (call == MAT_REUSE_MATRIX) {
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3533     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3534   } else {
3535     PetscInt cbs;
3536     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3537     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3538     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3539   }
3540   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3541   if (call == MAT_INITIAL_MATRIX) {
3542     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3543     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3544   }
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3549 #undef __FUNCT__
3550 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3551 /*
3552     Not great since it makes two copies of the submatrix, first an SeqAIJ
3553   in local and then by concatenating the local matrices the end result.
3554   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3555 
3556   Note: This requires a sequential iscol with all indices.
3557 */
3558 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3559 {
3560   PetscErrorCode ierr;
3561   PetscMPIInt    rank,size;
3562   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3563   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3564   PetscBool      allcolumns, colflag;
3565   Mat            M,Mreuse;
3566   MatScalar      *vwork,*aa;
3567   MPI_Comm       comm;
3568   Mat_SeqAIJ     *aij;
3569 
3570   PetscFunctionBegin;
3571   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3572   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3574 
3575   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3576   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3577   if (colflag && ncol == mat->cmap->N) {
3578     allcolumns = PETSC_TRUE;
3579   } else {
3580     allcolumns = PETSC_FALSE;
3581   }
3582   if (call ==  MAT_REUSE_MATRIX) {
3583     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3584     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3585     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3586   } else {
3587     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3588   }
3589 
3590   /*
3591       m - number of local rows
3592       n - number of columns (same on all processors)
3593       rstart - first row in new global matrix generated
3594   */
3595   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3596   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3597   if (call == MAT_INITIAL_MATRIX) {
3598     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3599     ii  = aij->i;
3600     jj  = aij->j;
3601 
3602     /*
3603         Determine the number of non-zeros in the diagonal and off-diagonal
3604         portions of the matrix in order to do correct preallocation
3605     */
3606 
3607     /* first get start and end of "diagonal" columns */
3608     if (csize == PETSC_DECIDE) {
3609       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3610       if (mglobal == n) { /* square matrix */
3611         nlocal = m;
3612       } else {
3613         nlocal = n/size + ((n % size) > rank);
3614       }
3615     } else {
3616       nlocal = csize;
3617     }
3618     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3619     rstart = rend - nlocal;
3620     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3621 
3622     /* next, compute all the lengths */
3623     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3624     olens = dlens + m;
3625     for (i=0; i<m; i++) {
3626       jend = ii[i+1] - ii[i];
3627       olen = 0;
3628       dlen = 0;
3629       for (j=0; j<jend; j++) {
3630         if (*jj < rstart || *jj >= rend) olen++;
3631         else dlen++;
3632         jj++;
3633       }
3634       olens[i] = olen;
3635       dlens[i] = dlen;
3636     }
3637     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3638     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3639     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3640     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3641     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3642     ierr = PetscFree(dlens);CHKERRQ(ierr);
3643   } else {
3644     PetscInt ml,nl;
3645 
3646     M    = *newmat;
3647     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3648     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3649     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3650     /*
3651          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3652        rather than the slower MatSetValues().
3653     */
3654     M->was_assembled = PETSC_TRUE;
3655     M->assembled     = PETSC_FALSE;
3656   }
3657   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3658   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3659   ii   = aij->i;
3660   jj   = aij->j;
3661   aa   = aij->a;
3662   for (i=0; i<m; i++) {
3663     row   = rstart + i;
3664     nz    = ii[i+1] - ii[i];
3665     cwork = jj;     jj += nz;
3666     vwork = aa;     aa += nz;
3667     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3668   }
3669 
3670   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3671   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3672   *newmat = M;
3673 
3674   /* save submatrix used in processor for next request */
3675   if (call ==  MAT_INITIAL_MATRIX) {
3676     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3677     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3678   }
3679   PetscFunctionReturn(0);
3680 }
3681 
3682 #undef __FUNCT__
3683 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3684 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3685 {
3686   PetscInt       m,cstart, cend,j,nnz,i,d;
3687   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3688   const PetscInt *JJ;
3689   PetscScalar    *values;
3690   PetscErrorCode ierr;
3691 
3692   PetscFunctionBegin;
3693   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3694 
3695   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3696   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3697   m      = B->rmap->n;
3698   cstart = B->cmap->rstart;
3699   cend   = B->cmap->rend;
3700   rstart = B->rmap->rstart;
3701 
3702   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3703 
3704 #if defined(PETSC_USE_DEBUGGING)
3705   for (i=0; i<m; i++) {
3706     nnz = Ii[i+1]- Ii[i];
3707     JJ  = J + Ii[i];
3708     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3709     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3710     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3711   }
3712 #endif
3713 
3714   for (i=0; i<m; i++) {
3715     nnz     = Ii[i+1]- Ii[i];
3716     JJ      = J + Ii[i];
3717     nnz_max = PetscMax(nnz_max,nnz);
3718     d       = 0;
3719     for (j=0; j<nnz; j++) {
3720       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3721     }
3722     d_nnz[i] = d;
3723     o_nnz[i] = nnz - d;
3724   }
3725   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3726   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3727 
3728   if (v) values = (PetscScalar*)v;
3729   else {
3730     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3731   }
3732 
3733   for (i=0; i<m; i++) {
3734     ii   = i + rstart;
3735     nnz  = Ii[i+1]- Ii[i];
3736     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3737   }
3738   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3739   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3740 
3741   if (!v) {
3742     ierr = PetscFree(values);CHKERRQ(ierr);
3743   }
3744   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 #undef __FUNCT__
3749 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3750 /*@
3751    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3752    (the default parallel PETSc format).
3753 
3754    Collective on MPI_Comm
3755 
3756    Input Parameters:
3757 +  B - the matrix
3758 .  i - the indices into j for the start of each local row (starts with zero)
3759 .  j - the column indices for each local row (starts with zero)
3760 -  v - optional values in the matrix
3761 
3762    Level: developer
3763 
3764    Notes:
3765        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3766      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3767      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3768 
3769        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3770 
3771        The format which is used for the sparse matrix input, is equivalent to a
3772     row-major ordering.. i.e for the following matrix, the input data expected is
3773     as shown:
3774 
3775         1 0 0
3776         2 0 3     P0
3777        -------
3778         4 5 6     P1
3779 
3780      Process0 [P0]: rows_owned=[0,1]
3781         i =  {0,1,3}  [size = nrow+1  = 2+1]
3782         j =  {0,0,2}  [size = nz = 6]
3783         v =  {1,2,3}  [size = nz = 6]
3784 
3785      Process1 [P1]: rows_owned=[2]
3786         i =  {0,3}    [size = nrow+1  = 1+1]
3787         j =  {0,1,2}  [size = nz = 6]
3788         v =  {4,5,6}  [size = nz = 6]
3789 
3790 .keywords: matrix, aij, compressed row, sparse, parallel
3791 
3792 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3793           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3794 @*/
3795 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3796 {
3797   PetscErrorCode ierr;
3798 
3799   PetscFunctionBegin;
3800   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3801   PetscFunctionReturn(0);
3802 }
3803 
3804 #undef __FUNCT__
3805 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3806 /*@C
3807    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3808    (the default parallel PETSc format).  For good matrix assembly performance
3809    the user should preallocate the matrix storage by setting the parameters
3810    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3811    performance can be increased by more than a factor of 50.
3812 
3813    Collective on MPI_Comm
3814 
3815    Input Parameters:
3816 +  B - the matrix
3817 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3818            (same value is used for all local rows)
3819 .  d_nnz - array containing the number of nonzeros in the various rows of the
3820            DIAGONAL portion of the local submatrix (possibly different for each row)
3821            or NULL, if d_nz is used to specify the nonzero structure.
3822            The size of this array is equal to the number of local rows, i.e 'm'.
3823            For matrices that will be factored, you must leave room for (and set)
3824            the diagonal entry even if it is zero.
3825 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3826            submatrix (same value is used for all local rows).
3827 -  o_nnz - array containing the number of nonzeros in the various rows of the
3828            OFF-DIAGONAL portion of the local submatrix (possibly different for
3829            each row) or NULL, if o_nz is used to specify the nonzero
3830            structure. The size of this array is equal to the number
3831            of local rows, i.e 'm'.
3832 
3833    If the *_nnz parameter is given then the *_nz parameter is ignored
3834 
3835    The AIJ format (also called the Yale sparse matrix format or
3836    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3837    storage.  The stored row and column indices begin with zero.
3838    See Users-Manual: ch_mat for details.
3839 
3840    The parallel matrix is partitioned such that the first m0 rows belong to
3841    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3842    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3843 
3844    The DIAGONAL portion of the local submatrix of a processor can be defined
3845    as the submatrix which is obtained by extraction the part corresponding to
3846    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3847    first row that belongs to the processor, r2 is the last row belonging to
3848    the this processor, and c1-c2 is range of indices of the local part of a
3849    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3850    common case of a square matrix, the row and column ranges are the same and
3851    the DIAGONAL part is also square. The remaining portion of the local
3852    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3853 
3854    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3855 
3856    You can call MatGetInfo() to get information on how effective the preallocation was;
3857    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3858    You can also run with the option -info and look for messages with the string
3859    malloc in them to see if additional memory allocation was needed.
3860 
3861    Example usage:
3862 
3863    Consider the following 8x8 matrix with 34 non-zero values, that is
3864    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3865    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3866    as follows:
3867 
3868 .vb
3869             1  2  0  |  0  3  0  |  0  4
3870     Proc0   0  5  6  |  7  0  0  |  8  0
3871             9  0 10  | 11  0  0  | 12  0
3872     -------------------------------------
3873            13  0 14  | 15 16 17  |  0  0
3874     Proc1   0 18  0  | 19 20 21  |  0  0
3875             0  0  0  | 22 23  0  | 24  0
3876     -------------------------------------
3877     Proc2  25 26 27  |  0  0 28  | 29  0
3878            30  0  0  | 31 32 33  |  0 34
3879 .ve
3880 
3881    This can be represented as a collection of submatrices as:
3882 
3883 .vb
3884       A B C
3885       D E F
3886       G H I
3887 .ve
3888 
3889    Where the submatrices A,B,C are owned by proc0, D,E,F are
3890    owned by proc1, G,H,I are owned by proc2.
3891 
3892    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3893    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3894    The 'M','N' parameters are 8,8, and have the same values on all procs.
3895 
3896    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3897    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3898    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3899    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3900    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3901    matrix, ans [DF] as another SeqAIJ matrix.
3902 
3903    When d_nz, o_nz parameters are specified, d_nz storage elements are
3904    allocated for every row of the local diagonal submatrix, and o_nz
3905    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3906    One way to choose d_nz and o_nz is to use the max nonzerors per local
3907    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3908    In this case, the values of d_nz,o_nz are:
3909 .vb
3910      proc0 : dnz = 2, o_nz = 2
3911      proc1 : dnz = 3, o_nz = 2
3912      proc2 : dnz = 1, o_nz = 4
3913 .ve
3914    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3915    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3916    for proc3. i.e we are using 12+15+10=37 storage locations to store
3917    34 values.
3918 
3919    When d_nnz, o_nnz parameters are specified, the storage is specified
3920    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3921    In the above case the values for d_nnz,o_nnz are:
3922 .vb
3923      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3924      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3925      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3926 .ve
3927    Here the space allocated is sum of all the above values i.e 34, and
3928    hence pre-allocation is perfect.
3929 
3930    Level: intermediate
3931 
3932 .keywords: matrix, aij, compressed row, sparse, parallel
3933 
3934 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3935           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3936 @*/
3937 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3938 {
3939   PetscErrorCode ierr;
3940 
3941   PetscFunctionBegin;
3942   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3943   PetscValidType(B,1);
3944   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3945   PetscFunctionReturn(0);
3946 }
3947 
3948 #undef __FUNCT__
3949 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3950 /*@
3951      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3952          CSR format the local rows.
3953 
3954    Collective on MPI_Comm
3955 
3956    Input Parameters:
3957 +  comm - MPI communicator
3958 .  m - number of local rows (Cannot be PETSC_DECIDE)
3959 .  n - This value should be the same as the local size used in creating the
3960        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3961        calculated if N is given) For square matrices n is almost always m.
3962 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3963 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3964 .   i - row indices
3965 .   j - column indices
3966 -   a - matrix values
3967 
3968    Output Parameter:
3969 .   mat - the matrix
3970 
3971    Level: intermediate
3972 
3973    Notes:
3974        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3975      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3976      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3977 
3978        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3979 
3980        The format which is used for the sparse matrix input, is equivalent to a
3981     row-major ordering.. i.e for the following matrix, the input data expected is
3982     as shown:
3983 
3984         1 0 0
3985         2 0 3     P0
3986        -------
3987         4 5 6     P1
3988 
3989      Process0 [P0]: rows_owned=[0,1]
3990         i =  {0,1,3}  [size = nrow+1  = 2+1]
3991         j =  {0,0,2}  [size = nz = 6]
3992         v =  {1,2,3}  [size = nz = 6]
3993 
3994      Process1 [P1]: rows_owned=[2]
3995         i =  {0,3}    [size = nrow+1  = 1+1]
3996         j =  {0,1,2}  [size = nz = 6]
3997         v =  {4,5,6}  [size = nz = 6]
3998 
3999 .keywords: matrix, aij, compressed row, sparse, parallel
4000 
4001 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4002           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4003 @*/
4004 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4005 {
4006   PetscErrorCode ierr;
4007 
4008   PetscFunctionBegin;
4009   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4010   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4011   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4012   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4013   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4014   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4015   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4016   PetscFunctionReturn(0);
4017 }
4018 
4019 #undef __FUNCT__
4020 #define __FUNCT__ "MatCreateAIJ"
4021 /*@C
4022    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4023    (the default parallel PETSc format).  For good matrix assembly performance
4024    the user should preallocate the matrix storage by setting the parameters
4025    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4026    performance can be increased by more than a factor of 50.
4027 
4028    Collective on MPI_Comm
4029 
4030    Input Parameters:
4031 +  comm - MPI communicator
4032 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4033            This value should be the same as the local size used in creating the
4034            y vector for the matrix-vector product y = Ax.
4035 .  n - This value should be the same as the local size used in creating the
4036        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4037        calculated if N is given) For square matrices n is almost always m.
4038 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4039 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4040 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4041            (same value is used for all local rows)
4042 .  d_nnz - array containing the number of nonzeros in the various rows of the
4043            DIAGONAL portion of the local submatrix (possibly different for each row)
4044            or NULL, if d_nz is used to specify the nonzero structure.
4045            The size of this array is equal to the number of local rows, i.e 'm'.
4046 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4047            submatrix (same value is used for all local rows).
4048 -  o_nnz - array containing the number of nonzeros in the various rows of the
4049            OFF-DIAGONAL portion of the local submatrix (possibly different for
4050            each row) or NULL, if o_nz is used to specify the nonzero
4051            structure. The size of this array is equal to the number
4052            of local rows, i.e 'm'.
4053 
4054    Output Parameter:
4055 .  A - the matrix
4056 
4057    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4058    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4059    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4060 
4061    Notes:
4062    If the *_nnz parameter is given then the *_nz parameter is ignored
4063 
4064    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4065    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4066    storage requirements for this matrix.
4067 
4068    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4069    processor than it must be used on all processors that share the object for
4070    that argument.
4071 
4072    The user MUST specify either the local or global matrix dimensions
4073    (possibly both).
4074 
4075    The parallel matrix is partitioned across processors such that the
4076    first m0 rows belong to process 0, the next m1 rows belong to
4077    process 1, the next m2 rows belong to process 2 etc.. where
4078    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4079    values corresponding to [m x N] submatrix.
4080 
4081    The columns are logically partitioned with the n0 columns belonging
4082    to 0th partition, the next n1 columns belonging to the next
4083    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4084 
4085    The DIAGONAL portion of the local submatrix on any given processor
4086    is the submatrix corresponding to the rows and columns m,n
4087    corresponding to the given processor. i.e diagonal matrix on
4088    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4089    etc. The remaining portion of the local submatrix [m x (N-n)]
4090    constitute the OFF-DIAGONAL portion. The example below better
4091    illustrates this concept.
4092 
4093    For a square global matrix we define each processor's diagonal portion
4094    to be its local rows and the corresponding columns (a square submatrix);
4095    each processor's off-diagonal portion encompasses the remainder of the
4096    local matrix (a rectangular submatrix).
4097 
4098    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4099 
4100    When calling this routine with a single process communicator, a matrix of
4101    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4102    type of communicator, use the construction mechanism:
4103      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4104 
4105    By default, this format uses inodes (identical nodes) when possible.
4106    We search for consecutive rows with the same nonzero structure, thereby
4107    reusing matrix information to achieve increased efficiency.
4108 
4109    Options Database Keys:
4110 +  -mat_no_inode  - Do not use inodes
4111 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4112 -  -mat_aij_oneindex - Internally use indexing starting at 1
4113         rather than 0.  Note that when calling MatSetValues(),
4114         the user still MUST index entries starting at 0!
4115 
4116 
4117    Example usage:
4118 
4119    Consider the following 8x8 matrix with 34 non-zero values, that is
4120    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4121    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4122    as follows:
4123 
4124 .vb
4125             1  2  0  |  0  3  0  |  0  4
4126     Proc0   0  5  6  |  7  0  0  |  8  0
4127             9  0 10  | 11  0  0  | 12  0
4128     -------------------------------------
4129            13  0 14  | 15 16 17  |  0  0
4130     Proc1   0 18  0  | 19 20 21  |  0  0
4131             0  0  0  | 22 23  0  | 24  0
4132     -------------------------------------
4133     Proc2  25 26 27  |  0  0 28  | 29  0
4134            30  0  0  | 31 32 33  |  0 34
4135 .ve
4136 
4137    This can be represented as a collection of submatrices as:
4138 
4139 .vb
4140       A B C
4141       D E F
4142       G H I
4143 .ve
4144 
4145    Where the submatrices A,B,C are owned by proc0, D,E,F are
4146    owned by proc1, G,H,I are owned by proc2.
4147 
4148    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4149    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4150    The 'M','N' parameters are 8,8, and have the same values on all procs.
4151 
4152    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4153    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4154    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4155    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4156    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4157    matrix, ans [DF] as another SeqAIJ matrix.
4158 
4159    When d_nz, o_nz parameters are specified, d_nz storage elements are
4160    allocated for every row of the local diagonal submatrix, and o_nz
4161    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4162    One way to choose d_nz and o_nz is to use the max nonzerors per local
4163    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4164    In this case, the values of d_nz,o_nz are:
4165 .vb
4166      proc0 : dnz = 2, o_nz = 2
4167      proc1 : dnz = 3, o_nz = 2
4168      proc2 : dnz = 1, o_nz = 4
4169 .ve
4170    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4171    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4172    for proc3. i.e we are using 12+15+10=37 storage locations to store
4173    34 values.
4174 
4175    When d_nnz, o_nnz parameters are specified, the storage is specified
4176    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4177    In the above case the values for d_nnz,o_nnz are:
4178 .vb
4179      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4180      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4181      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4182 .ve
4183    Here the space allocated is sum of all the above values i.e 34, and
4184    hence pre-allocation is perfect.
4185 
4186    Level: intermediate
4187 
4188 .keywords: matrix, aij, compressed row, sparse, parallel
4189 
4190 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4191           MPIAIJ, MatCreateMPIAIJWithArrays()
4192 @*/
4193 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4194 {
4195   PetscErrorCode ierr;
4196   PetscMPIInt    size;
4197 
4198   PetscFunctionBegin;
4199   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4200   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4201   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4202   if (size > 1) {
4203     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4204     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4205   } else {
4206     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4207     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4208   }
4209   PetscFunctionReturn(0);
4210 }
4211 
4212 #undef __FUNCT__
4213 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4214 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4215 {
4216   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4217 
4218   PetscFunctionBegin;
4219   if (Ad)     *Ad     = a->A;
4220   if (Ao)     *Ao     = a->B;
4221   if (colmap) *colmap = a->garray;
4222   PetscFunctionReturn(0);
4223 }
4224 
4225 #undef __FUNCT__
4226 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4227 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4228 {
4229   PetscErrorCode ierr;
4230   PetscInt       i;
4231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4232 
4233   PetscFunctionBegin;
4234   if (coloring->ctype == IS_COLORING_GLOBAL) {
4235     ISColoringValue *allcolors,*colors;
4236     ISColoring      ocoloring;
4237 
4238     /* set coloring for diagonal portion */
4239     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4240 
4241     /* set coloring for off-diagonal portion */
4242     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4243     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4244     for (i=0; i<a->B->cmap->n; i++) {
4245       colors[i] = allcolors[a->garray[i]];
4246     }
4247     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4248     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4249     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4250     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4251   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4252     ISColoringValue *colors;
4253     PetscInt        *larray;
4254     ISColoring      ocoloring;
4255 
4256     /* set coloring for diagonal portion */
4257     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4258     for (i=0; i<a->A->cmap->n; i++) {
4259       larray[i] = i + A->cmap->rstart;
4260     }
4261     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4262     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4263     for (i=0; i<a->A->cmap->n; i++) {
4264       colors[i] = coloring->colors[larray[i]];
4265     }
4266     ierr = PetscFree(larray);CHKERRQ(ierr);
4267     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4268     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4269     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4270 
4271     /* set coloring for off-diagonal portion */
4272     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4273     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4274     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4275     for (i=0; i<a->B->cmap->n; i++) {
4276       colors[i] = coloring->colors[larray[i]];
4277     }
4278     ierr = PetscFree(larray);CHKERRQ(ierr);
4279     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4280     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4281     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4282   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4283   PetscFunctionReturn(0);
4284 }
4285 
4286 #undef __FUNCT__
4287 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4288 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4289 {
4290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4291   PetscErrorCode ierr;
4292 
4293   PetscFunctionBegin;
4294   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4295   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4296   PetscFunctionReturn(0);
4297 }
4298 
4299 #undef __FUNCT__
4300 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4301 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4302 {
4303   PetscErrorCode ierr;
4304   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4305   PetscInt       *indx;
4306 
4307   PetscFunctionBegin;
4308   /* This routine will ONLY return MPIAIJ type matrix */
4309   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4310   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4311   if (n == PETSC_DECIDE) {
4312     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4313   }
4314   /* Check sum(n) = N */
4315   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4316   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4317 
4318   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4319   rstart -= m;
4320 
4321   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4322   for (i=0; i<m; i++) {
4323     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4324     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4325     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4326   }
4327 
4328   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4329   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4330   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4331   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4332   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4333   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4339 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4340 {
4341   PetscErrorCode ierr;
4342   PetscInt       m,N,i,rstart,nnz,Ii;
4343   PetscInt       *indx;
4344   PetscScalar    *values;
4345 
4346   PetscFunctionBegin;
4347   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4348   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4349   for (i=0; i<m; i++) {
4350     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4351     Ii   = i + rstart;
4352     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4353     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4354   }
4355   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4356   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4357   PetscFunctionReturn(0);
4358 }
4359 
4360 #undef __FUNCT__
4361 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4362 /*@
4363       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4364                  matrices from each processor
4365 
4366     Collective on MPI_Comm
4367 
4368    Input Parameters:
4369 +    comm - the communicators the parallel matrix will live on
4370 .    inmat - the input sequential matrices
4371 .    n - number of local columns (or PETSC_DECIDE)
4372 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4373 
4374    Output Parameter:
4375 .    outmat - the parallel matrix generated
4376 
4377     Level: advanced
4378 
4379    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4380 
4381 @*/
4382 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4383 {
4384   PetscErrorCode ierr;
4385   PetscMPIInt    size;
4386 
4387   PetscFunctionBegin;
4388   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4389   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4390   if (size == 1) {
4391     if (scall == MAT_INITIAL_MATRIX) {
4392       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4393     } else {
4394       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4395     }
4396   } else {
4397     if (scall == MAT_INITIAL_MATRIX) {
4398       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4399     }
4400     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4401   }
4402   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "MatFileSplit"
4408 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4409 {
4410   PetscErrorCode    ierr;
4411   PetscMPIInt       rank;
4412   PetscInt          m,N,i,rstart,nnz;
4413   size_t            len;
4414   const PetscInt    *indx;
4415   PetscViewer       out;
4416   char              *name;
4417   Mat               B;
4418   const PetscScalar *values;
4419 
4420   PetscFunctionBegin;
4421   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4422   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4423   /* Should this be the type of the diagonal block of A? */
4424   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4425   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4426   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4427   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4428   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4429   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4430   for (i=0; i<m; i++) {
4431     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4432     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4433     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4434   }
4435   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4436   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4437 
4438   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4439   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4440   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4441   sprintf(name,"%s.%d",outfile,rank);
4442   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4443   ierr = PetscFree(name);CHKERRQ(ierr);
4444   ierr = MatView(B,out);CHKERRQ(ierr);
4445   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4446   ierr = MatDestroy(&B);CHKERRQ(ierr);
4447   PetscFunctionReturn(0);
4448 }
4449 
4450 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4451 #undef __FUNCT__
4452 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4453 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4454 {
4455   PetscErrorCode      ierr;
4456   Mat_Merge_SeqsToMPI *merge;
4457   PetscContainer      container;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4461   if (container) {
4462     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4475     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4476     ierr = PetscFree(merge);CHKERRQ(ierr);
4477     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4478   }
4479   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4480   PetscFunctionReturn(0);
4481 }
4482 
4483 #include <../src/mat/utils/freespace.h>
4484 #include <petscbt.h>
4485 
4486 #undef __FUNCT__
4487 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4488 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4489 {
4490   PetscErrorCode      ierr;
4491   MPI_Comm            comm;
4492   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4493   PetscMPIInt         size,rank,taga,*len_s;
4494   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4495   PetscInt            proc,m;
4496   PetscInt            **buf_ri,**buf_rj;
4497   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4498   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4499   MPI_Request         *s_waits,*r_waits;
4500   MPI_Status          *status;
4501   MatScalar           *aa=a->a;
4502   MatScalar           **abuf_r,*ba_i;
4503   Mat_Merge_SeqsToMPI *merge;
4504   PetscContainer      container;
4505 
4506   PetscFunctionBegin;
4507   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4508   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4509 
4510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4511   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4512 
4513   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4514   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4515 
4516   bi     = merge->bi;
4517   bj     = merge->bj;
4518   buf_ri = merge->buf_ri;
4519   buf_rj = merge->buf_rj;
4520 
4521   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4522   owners = merge->rowmap->range;
4523   len_s  = merge->len_s;
4524 
4525   /* send and recv matrix values */
4526   /*-----------------------------*/
4527   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4528   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4529 
4530   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4531   for (proc=0,k=0; proc<size; proc++) {
4532     if (!len_s[proc]) continue;
4533     i    = owners[proc];
4534     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4535     k++;
4536   }
4537 
4538   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4539   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4540   ierr = PetscFree(status);CHKERRQ(ierr);
4541 
4542   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4543   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4544 
4545   /* insert mat values of mpimat */
4546   /*----------------------------*/
4547   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4548   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4549 
4550   for (k=0; k<merge->nrecv; k++) {
4551     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4552     nrows       = *(buf_ri_k[k]);
4553     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4554     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4555   }
4556 
4557   /* set values of ba */
4558   m = merge->rowmap->n;
4559   for (i=0; i<m; i++) {
4560     arow = owners[rank] + i;
4561     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4562     bnzi = bi[i+1] - bi[i];
4563     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4564 
4565     /* add local non-zero vals of this proc's seqmat into ba */
4566     anzi   = ai[arow+1] - ai[arow];
4567     aj     = a->j + ai[arow];
4568     aa     = a->a + ai[arow];
4569     nextaj = 0;
4570     for (j=0; nextaj<anzi; j++) {
4571       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4572         ba_i[j] += aa[nextaj++];
4573       }
4574     }
4575 
4576     /* add received vals into ba */
4577     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4578       /* i-th row */
4579       if (i == *nextrow[k]) {
4580         anzi   = *(nextai[k]+1) - *nextai[k];
4581         aj     = buf_rj[k] + *(nextai[k]);
4582         aa     = abuf_r[k] + *(nextai[k]);
4583         nextaj = 0;
4584         for (j=0; nextaj<anzi; j++) {
4585           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4586             ba_i[j] += aa[nextaj++];
4587           }
4588         }
4589         nextrow[k]++; nextai[k]++;
4590       }
4591     }
4592     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4593   }
4594   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4596 
4597   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4598   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4599   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4600   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4601   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4602   PetscFunctionReturn(0);
4603 }
4604 
4605 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4606 
4607 #undef __FUNCT__
4608 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4609 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4610 {
4611   PetscErrorCode      ierr;
4612   Mat                 B_mpi;
4613   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4614   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4615   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4616   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4617   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4618   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4619   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4620   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4621   MPI_Status          *status;
4622   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4623   PetscBT             lnkbt;
4624   Mat_Merge_SeqsToMPI *merge;
4625   PetscContainer      container;
4626 
4627   PetscFunctionBegin;
4628   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4629 
4630   /* make sure it is a PETSc comm */
4631   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4632   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4633   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4634 
4635   ierr = PetscNew(&merge);CHKERRQ(ierr);
4636   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4637 
4638   /* determine row ownership */
4639   /*---------------------------------------------------------*/
4640   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4641   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4642   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4643   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4644   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4645   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4646   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4647 
4648   m      = merge->rowmap->n;
4649   owners = merge->rowmap->range;
4650 
4651   /* determine the number of messages to send, their lengths */
4652   /*---------------------------------------------------------*/
4653   len_s = merge->len_s;
4654 
4655   len          = 0; /* length of buf_si[] */
4656   merge->nsend = 0;
4657   for (proc=0; proc<size; proc++) {
4658     len_si[proc] = 0;
4659     if (proc == rank) {
4660       len_s[proc] = 0;
4661     } else {
4662       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4663       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4664     }
4665     if (len_s[proc]) {
4666       merge->nsend++;
4667       nrows = 0;
4668       for (i=owners[proc]; i<owners[proc+1]; i++) {
4669         if (ai[i+1] > ai[i]) nrows++;
4670       }
4671       len_si[proc] = 2*(nrows+1);
4672       len         += len_si[proc];
4673     }
4674   }
4675 
4676   /* determine the number and length of messages to receive for ij-structure */
4677   /*-------------------------------------------------------------------------*/
4678   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4679   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4680 
4681   /* post the Irecv of j-structure */
4682   /*-------------------------------*/
4683   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4684   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4685 
4686   /* post the Isend of j-structure */
4687   /*--------------------------------*/
4688   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4689 
4690   for (proc=0, k=0; proc<size; proc++) {
4691     if (!len_s[proc]) continue;
4692     i    = owners[proc];
4693     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4694     k++;
4695   }
4696 
4697   /* receives and sends of j-structure are complete */
4698   /*------------------------------------------------*/
4699   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4700   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4701 
4702   /* send and recv i-structure */
4703   /*---------------------------*/
4704   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4705   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4706 
4707   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4708   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4709   for (proc=0,k=0; proc<size; proc++) {
4710     if (!len_s[proc]) continue;
4711     /* form outgoing message for i-structure:
4712          buf_si[0]:                 nrows to be sent
4713                [1:nrows]:           row index (global)
4714                [nrows+1:2*nrows+1]: i-structure index
4715     */
4716     /*-------------------------------------------*/
4717     nrows       = len_si[proc]/2 - 1;
4718     buf_si_i    = buf_si + nrows+1;
4719     buf_si[0]   = nrows;
4720     buf_si_i[0] = 0;
4721     nrows       = 0;
4722     for (i=owners[proc]; i<owners[proc+1]; i++) {
4723       anzi = ai[i+1] - ai[i];
4724       if (anzi) {
4725         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4726         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4727         nrows++;
4728       }
4729     }
4730     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4731     k++;
4732     buf_si += len_si[proc];
4733   }
4734 
4735   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4736   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4737 
4738   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4739   for (i=0; i<merge->nrecv; i++) {
4740     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4741   }
4742 
4743   ierr = PetscFree(len_si);CHKERRQ(ierr);
4744   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4745   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4746   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4747   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4748   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4749   ierr = PetscFree(status);CHKERRQ(ierr);
4750 
4751   /* compute a local seq matrix in each processor */
4752   /*----------------------------------------------*/
4753   /* allocate bi array and free space for accumulating nonzero column info */
4754   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4755   bi[0] = 0;
4756 
4757   /* create and initialize a linked list */
4758   nlnk = N+1;
4759   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4760 
4761   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4762   len  = ai[owners[rank+1]] - ai[owners[rank]];
4763   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4764 
4765   current_space = free_space;
4766 
4767   /* determine symbolic info for each local row */
4768   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4769 
4770   for (k=0; k<merge->nrecv; k++) {
4771     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4772     nrows       = *buf_ri_k[k];
4773     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4774     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4775   }
4776 
4777   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4778   len  = 0;
4779   for (i=0; i<m; i++) {
4780     bnzi = 0;
4781     /* add local non-zero cols of this proc's seqmat into lnk */
4782     arow  = owners[rank] + i;
4783     anzi  = ai[arow+1] - ai[arow];
4784     aj    = a->j + ai[arow];
4785     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4786     bnzi += nlnk;
4787     /* add received col data into lnk */
4788     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4789       if (i == *nextrow[k]) { /* i-th row */
4790         anzi  = *(nextai[k]+1) - *nextai[k];
4791         aj    = buf_rj[k] + *nextai[k];
4792         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4793         bnzi += nlnk;
4794         nextrow[k]++; nextai[k]++;
4795       }
4796     }
4797     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4798 
4799     /* if free space is not available, make more free space */
4800     if (current_space->local_remaining<bnzi) {
4801       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4802       nspacedouble++;
4803     }
4804     /* copy data into free space, then initialize lnk */
4805     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4806     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4807 
4808     current_space->array           += bnzi;
4809     current_space->local_used      += bnzi;
4810     current_space->local_remaining -= bnzi;
4811 
4812     bi[i+1] = bi[i] + bnzi;
4813   }
4814 
4815   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4816 
4817   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4818   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4819   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4820 
4821   /* create symbolic parallel matrix B_mpi */
4822   /*---------------------------------------*/
4823   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4824   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4825   if (n==PETSC_DECIDE) {
4826     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4827   } else {
4828     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4829   }
4830   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4831   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4832   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4833   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4834   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4835 
4836   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4837   B_mpi->assembled    = PETSC_FALSE;
4838   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4839   merge->bi           = bi;
4840   merge->bj           = bj;
4841   merge->buf_ri       = buf_ri;
4842   merge->buf_rj       = buf_rj;
4843   merge->coi          = NULL;
4844   merge->coj          = NULL;
4845   merge->owners_co    = NULL;
4846 
4847   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4848 
4849   /* attach the supporting struct to B_mpi for reuse */
4850   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4851   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4852   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4853   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4854   *mpimat = B_mpi;
4855 
4856   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4857   PetscFunctionReturn(0);
4858 }
4859 
4860 #undef __FUNCT__
4861 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4862 /*@C
4863       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4864                  matrices from each processor
4865 
4866     Collective on MPI_Comm
4867 
4868    Input Parameters:
4869 +    comm - the communicators the parallel matrix will live on
4870 .    seqmat - the input sequential matrices
4871 .    m - number of local rows (or PETSC_DECIDE)
4872 .    n - number of local columns (or PETSC_DECIDE)
4873 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4874 
4875    Output Parameter:
4876 .    mpimat - the parallel matrix generated
4877 
4878     Level: advanced
4879 
4880    Notes:
4881      The dimensions of the sequential matrix in each processor MUST be the same.
4882      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4883      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4884 @*/
4885 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4886 {
4887   PetscErrorCode ierr;
4888   PetscMPIInt    size;
4889 
4890   PetscFunctionBegin;
4891   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4892   if (size == 1) {
4893     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4894     if (scall == MAT_INITIAL_MATRIX) {
4895       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4896     } else {
4897       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4898     }
4899     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4900     PetscFunctionReturn(0);
4901   }
4902   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4903   if (scall == MAT_INITIAL_MATRIX) {
4904     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4905   }
4906   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4907   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4908   PetscFunctionReturn(0);
4909 }
4910 
4911 #undef __FUNCT__
4912 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4913 /*@
4914      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4915           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4916           with MatGetSize()
4917 
4918     Not Collective
4919 
4920    Input Parameters:
4921 +    A - the matrix
4922 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4923 
4924    Output Parameter:
4925 .    A_loc - the local sequential matrix generated
4926 
4927     Level: developer
4928 
4929 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4930 
4931 @*/
4932 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4933 {
4934   PetscErrorCode ierr;
4935   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4936   Mat_SeqAIJ     *mat,*a,*b;
4937   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4938   MatScalar      *aa,*ba,*cam;
4939   PetscScalar    *ca;
4940   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4941   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4942   PetscBool      match;
4943   MPI_Comm       comm;
4944   PetscMPIInt    size;
4945 
4946   PetscFunctionBegin;
4947   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4948   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4949   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4950   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4951   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4952 
4953   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4954   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4955   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4956   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4957   aa = a->a; ba = b->a;
4958   if (scall == MAT_INITIAL_MATRIX) {
4959     if (size == 1) {
4960       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4961       PetscFunctionReturn(0);
4962     }
4963 
4964     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4965     ci[0] = 0;
4966     for (i=0; i<am; i++) {
4967       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4968     }
4969     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4970     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4971     k    = 0;
4972     for (i=0; i<am; i++) {
4973       ncols_o = bi[i+1] - bi[i];
4974       ncols_d = ai[i+1] - ai[i];
4975       /* off-diagonal portion of A */
4976       for (jo=0; jo<ncols_o; jo++) {
4977         col = cmap[*bj];
4978         if (col >= cstart) break;
4979         cj[k]   = col; bj++;
4980         ca[k++] = *ba++;
4981       }
4982       /* diagonal portion of A */
4983       for (j=0; j<ncols_d; j++) {
4984         cj[k]   = cstart + *aj++;
4985         ca[k++] = *aa++;
4986       }
4987       /* off-diagonal portion of A */
4988       for (j=jo; j<ncols_o; j++) {
4989         cj[k]   = cmap[*bj++];
4990         ca[k++] = *ba++;
4991       }
4992     }
4993     /* put together the new matrix */
4994     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4995     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4996     /* Since these are PETSc arrays, change flags to free them as necessary. */
4997     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4998     mat->free_a  = PETSC_TRUE;
4999     mat->free_ij = PETSC_TRUE;
5000     mat->nonew   = 0;
5001   } else if (scall == MAT_REUSE_MATRIX) {
5002     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5003     ci = mat->i; cj = mat->j; cam = mat->a;
5004     for (i=0; i<am; i++) {
5005       /* off-diagonal portion of A */
5006       ncols_o = bi[i+1] - bi[i];
5007       for (jo=0; jo<ncols_o; jo++) {
5008         col = cmap[*bj];
5009         if (col >= cstart) break;
5010         *cam++ = *ba++; bj++;
5011       }
5012       /* diagonal portion of A */
5013       ncols_d = ai[i+1] - ai[i];
5014       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5015       /* off-diagonal portion of A */
5016       for (j=jo; j<ncols_o; j++) {
5017         *cam++ = *ba++; bj++;
5018       }
5019     }
5020   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5021   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5022   PetscFunctionReturn(0);
5023 }
5024 
5025 #undef __FUNCT__
5026 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5027 /*@C
5028      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5029 
5030     Not Collective
5031 
5032    Input Parameters:
5033 +    A - the matrix
5034 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5035 -    row, col - index sets of rows and columns to extract (or NULL)
5036 
5037    Output Parameter:
5038 .    A_loc - the local sequential matrix generated
5039 
5040     Level: developer
5041 
5042 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5043 
5044 @*/
5045 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5046 {
5047   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5048   PetscErrorCode ierr;
5049   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5050   IS             isrowa,iscola;
5051   Mat            *aloc;
5052   PetscBool      match;
5053 
5054   PetscFunctionBegin;
5055   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5056   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5057   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5058   if (!row) {
5059     start = A->rmap->rstart; end = A->rmap->rend;
5060     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5061   } else {
5062     isrowa = *row;
5063   }
5064   if (!col) {
5065     start = A->cmap->rstart;
5066     cmap  = a->garray;
5067     nzA   = a->A->cmap->n;
5068     nzB   = a->B->cmap->n;
5069     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5070     ncols = 0;
5071     for (i=0; i<nzB; i++) {
5072       if (cmap[i] < start) idx[ncols++] = cmap[i];
5073       else break;
5074     }
5075     imark = i;
5076     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5077     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5078     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5079   } else {
5080     iscola = *col;
5081   }
5082   if (scall != MAT_INITIAL_MATRIX) {
5083     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5084     aloc[0] = *A_loc;
5085   }
5086   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5087   *A_loc = aloc[0];
5088   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5089   if (!row) {
5090     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5091   }
5092   if (!col) {
5093     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5094   }
5095   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5096   PetscFunctionReturn(0);
5097 }
5098 
5099 #undef __FUNCT__
5100 #define __FUNCT__ "MatGetBrowsOfAcols"
5101 /*@C
5102     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5103 
5104     Collective on Mat
5105 
5106    Input Parameters:
5107 +    A,B - the matrices in mpiaij format
5108 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5109 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5110 
5111    Output Parameter:
5112 +    rowb, colb - index sets of rows and columns of B to extract
5113 -    B_seq - the sequential matrix generated
5114 
5115     Level: developer
5116 
5117 @*/
5118 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5119 {
5120   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5121   PetscErrorCode ierr;
5122   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5123   IS             isrowb,iscolb;
5124   Mat            *bseq=NULL;
5125 
5126   PetscFunctionBegin;
5127   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5128     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5129   }
5130   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5131 
5132   if (scall == MAT_INITIAL_MATRIX) {
5133     start = A->cmap->rstart;
5134     cmap  = a->garray;
5135     nzA   = a->A->cmap->n;
5136     nzB   = a->B->cmap->n;
5137     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5138     ncols = 0;
5139     for (i=0; i<nzB; i++) {  /* row < local row index */
5140       if (cmap[i] < start) idx[ncols++] = cmap[i];
5141       else break;
5142     }
5143     imark = i;
5144     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5145     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5146     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5147     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5148   } else {
5149     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5150     isrowb  = *rowb; iscolb = *colb;
5151     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5152     bseq[0] = *B_seq;
5153   }
5154   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5155   *B_seq = bseq[0];
5156   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5157   if (!rowb) {
5158     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5159   } else {
5160     *rowb = isrowb;
5161   }
5162   if (!colb) {
5163     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5164   } else {
5165     *colb = iscolb;
5166   }
5167   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5168   PetscFunctionReturn(0);
5169 }
5170 
5171 #undef __FUNCT__
5172 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5173 /*
5174     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5175     of the OFF-DIAGONAL portion of local A
5176 
5177     Collective on Mat
5178 
5179    Input Parameters:
5180 +    A,B - the matrices in mpiaij format
5181 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5182 
5183    Output Parameter:
5184 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5185 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5186 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5187 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5188 
5189     Level: developer
5190 
5191 */
5192 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5193 {
5194   VecScatter_MPI_General *gen_to,*gen_from;
5195   PetscErrorCode         ierr;
5196   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5197   Mat_SeqAIJ             *b_oth;
5198   VecScatter             ctx =a->Mvctx;
5199   MPI_Comm               comm;
5200   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5201   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5202   PetscScalar            *rvalues,*svalues;
5203   MatScalar              *b_otha,*bufa,*bufA;
5204   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5205   MPI_Request            *rwaits = NULL,*swaits = NULL;
5206   MPI_Status             *sstatus,rstatus;
5207   PetscMPIInt            jj,size;
5208   PetscInt               *cols,sbs,rbs;
5209   PetscScalar            *vals;
5210 
5211   PetscFunctionBegin;
5212   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5213   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5214   if (size == 1) PetscFunctionReturn(0);
5215 
5216   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5217     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5218   }
5219   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5220   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5221 
5222   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5223   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5224   rvalues  = gen_from->values; /* holds the length of receiving row */
5225   svalues  = gen_to->values;   /* holds the length of sending row */
5226   nrecvs   = gen_from->n;
5227   nsends   = gen_to->n;
5228 
5229   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5230   srow    = gen_to->indices;    /* local row index to be sent */
5231   sstarts = gen_to->starts;
5232   sprocs  = gen_to->procs;
5233   sstatus = gen_to->sstatus;
5234   sbs     = gen_to->bs;
5235   rstarts = gen_from->starts;
5236   rprocs  = gen_from->procs;
5237   rbs     = gen_from->bs;
5238 
5239   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5240   if (scall == MAT_INITIAL_MATRIX) {
5241     /* i-array */
5242     /*---------*/
5243     /*  post receives */
5244     for (i=0; i<nrecvs; i++) {
5245       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5246       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5247       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5248     }
5249 
5250     /* pack the outgoing message */
5251     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5252 
5253     sstartsj[0] = 0;
5254     rstartsj[0] = 0;
5255     len         = 0; /* total length of j or a array to be sent */
5256     k           = 0;
5257     for (i=0; i<nsends; i++) {
5258       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5259       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5260       for (j=0; j<nrows; j++) {
5261         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5262         for (l=0; l<sbs; l++) {
5263           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5264 
5265           rowlen[j*sbs+l] = ncols;
5266 
5267           len += ncols;
5268           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5269         }
5270         k++;
5271       }
5272       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5273 
5274       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5275     }
5276     /* recvs and sends of i-array are completed */
5277     i = nrecvs;
5278     while (i--) {
5279       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5280     }
5281     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5282 
5283     /* allocate buffers for sending j and a arrays */
5284     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5285     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5286 
5287     /* create i-array of B_oth */
5288     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5289 
5290     b_othi[0] = 0;
5291     len       = 0; /* total length of j or a array to be received */
5292     k         = 0;
5293     for (i=0; i<nrecvs; i++) {
5294       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5295       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5296       for (j=0; j<nrows; j++) {
5297         b_othi[k+1] = b_othi[k] + rowlen[j];
5298         len        += rowlen[j]; k++;
5299       }
5300       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5301     }
5302 
5303     /* allocate space for j and a arrrays of B_oth */
5304     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5305     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5306 
5307     /* j-array */
5308     /*---------*/
5309     /*  post receives of j-array */
5310     for (i=0; i<nrecvs; i++) {
5311       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5312       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5313     }
5314 
5315     /* pack the outgoing message j-array */
5316     k = 0;
5317     for (i=0; i<nsends; i++) {
5318       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5319       bufJ  = bufj+sstartsj[i];
5320       for (j=0; j<nrows; j++) {
5321         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5322         for (ll=0; ll<sbs; ll++) {
5323           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5324           for (l=0; l<ncols; l++) {
5325             *bufJ++ = cols[l];
5326           }
5327           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5328         }
5329       }
5330       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5331     }
5332 
5333     /* recvs and sends of j-array are completed */
5334     i = nrecvs;
5335     while (i--) {
5336       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5337     }
5338     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5339   } else if (scall == MAT_REUSE_MATRIX) {
5340     sstartsj = *startsj_s;
5341     rstartsj = *startsj_r;
5342     bufa     = *bufa_ptr;
5343     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5344     b_otha   = b_oth->a;
5345   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5346 
5347   /* a-array */
5348   /*---------*/
5349   /*  post receives of a-array */
5350   for (i=0; i<nrecvs; i++) {
5351     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5352     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5353   }
5354 
5355   /* pack the outgoing message a-array */
5356   k = 0;
5357   for (i=0; i<nsends; i++) {
5358     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5359     bufA  = bufa+sstartsj[i];
5360     for (j=0; j<nrows; j++) {
5361       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5362       for (ll=0; ll<sbs; ll++) {
5363         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5364         for (l=0; l<ncols; l++) {
5365           *bufA++ = vals[l];
5366         }
5367         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5368       }
5369     }
5370     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5371   }
5372   /* recvs and sends of a-array are completed */
5373   i = nrecvs;
5374   while (i--) {
5375     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5376   }
5377   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5378   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5379 
5380   if (scall == MAT_INITIAL_MATRIX) {
5381     /* put together the new matrix */
5382     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5383 
5384     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5385     /* Since these are PETSc arrays, change flags to free them as necessary. */
5386     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5387     b_oth->free_a  = PETSC_TRUE;
5388     b_oth->free_ij = PETSC_TRUE;
5389     b_oth->nonew   = 0;
5390 
5391     ierr = PetscFree(bufj);CHKERRQ(ierr);
5392     if (!startsj_s || !bufa_ptr) {
5393       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5394       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5395     } else {
5396       *startsj_s = sstartsj;
5397       *startsj_r = rstartsj;
5398       *bufa_ptr  = bufa;
5399     }
5400   }
5401   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5402   PetscFunctionReturn(0);
5403 }
5404 
5405 #undef __FUNCT__
5406 #define __FUNCT__ "MatGetCommunicationStructs"
5407 /*@C
5408   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5409 
5410   Not Collective
5411 
5412   Input Parameters:
5413 . A - The matrix in mpiaij format
5414 
5415   Output Parameter:
5416 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5417 . colmap - A map from global column index to local index into lvec
5418 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5419 
5420   Level: developer
5421 
5422 @*/
5423 #if defined(PETSC_USE_CTABLE)
5424 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5425 #else
5426 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5427 #endif
5428 {
5429   Mat_MPIAIJ *a;
5430 
5431   PetscFunctionBegin;
5432   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5433   PetscValidPointer(lvec, 2);
5434   PetscValidPointer(colmap, 3);
5435   PetscValidPointer(multScatter, 4);
5436   a = (Mat_MPIAIJ*) A->data;
5437   if (lvec) *lvec = a->lvec;
5438   if (colmap) *colmap = a->colmap;
5439   if (multScatter) *multScatter = a->Mvctx;
5440   PetscFunctionReturn(0);
5441 }
5442 
5443 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5444 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5446 
5447 #undef __FUNCT__
5448 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5449 /*
5450     Computes (B'*A')' since computing B*A directly is untenable
5451 
5452                n                       p                          p
5453         (              )       (              )         (                  )
5454       m (      A       )  *  n (       B      )   =   m (         C        )
5455         (              )       (              )         (                  )
5456 
5457 */
5458 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5459 {
5460   PetscErrorCode ierr;
5461   Mat            At,Bt,Ct;
5462 
5463   PetscFunctionBegin;
5464   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5465   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5466   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5467   ierr = MatDestroy(&At);CHKERRQ(ierr);
5468   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5469   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5470   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5471   PetscFunctionReturn(0);
5472 }
5473 
5474 #undef __FUNCT__
5475 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5476 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5477 {
5478   PetscErrorCode ierr;
5479   PetscInt       m=A->rmap->n,n=B->cmap->n;
5480   Mat            Cmat;
5481 
5482   PetscFunctionBegin;
5483   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5484   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5485   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5486   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5487   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5488   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5489   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5490   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5491 
5492   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5493 
5494   *C = Cmat;
5495   PetscFunctionReturn(0);
5496 }
5497 
5498 /* ----------------------------------------------------------------*/
5499 #undef __FUNCT__
5500 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5501 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5502 {
5503   PetscErrorCode ierr;
5504 
5505   PetscFunctionBegin;
5506   if (scall == MAT_INITIAL_MATRIX) {
5507     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5508     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5509     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5510   }
5511   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5512   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5513   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 #if defined(PETSC_HAVE_MUMPS)
5518 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5519 #endif
5520 #if defined(PETSC_HAVE_PASTIX)
5521 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5522 #endif
5523 #if defined(PETSC_HAVE_SUPERLU_DIST)
5524 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5525 #endif
5526 #if defined(PETSC_HAVE_CLIQUE)
5527 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5528 #endif
5529 
5530 /*MC
5531    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5532 
5533    Options Database Keys:
5534 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5535 
5536   Level: beginner
5537 
5538 .seealso: MatCreateAIJ()
5539 M*/
5540 
5541 #undef __FUNCT__
5542 #define __FUNCT__ "MatCreate_MPIAIJ"
5543 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5544 {
5545   Mat_MPIAIJ     *b;
5546   PetscErrorCode ierr;
5547   PetscMPIInt    size;
5548 
5549   PetscFunctionBegin;
5550   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5551 
5552   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5553   B->data       = (void*)b;
5554   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5555   B->assembled  = PETSC_FALSE;
5556   B->insertmode = NOT_SET_VALUES;
5557   b->size       = size;
5558 
5559   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5560 
5561   /* build cache for off array entries formed */
5562   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5563 
5564   b->donotstash  = PETSC_FALSE;
5565   b->colmap      = 0;
5566   b->garray      = 0;
5567   b->roworiented = PETSC_TRUE;
5568 
5569   /* stuff used for matrix vector multiply */
5570   b->lvec  = NULL;
5571   b->Mvctx = NULL;
5572 
5573   /* stuff for MatGetRow() */
5574   b->rowindices   = 0;
5575   b->rowvalues    = 0;
5576   b->getrowactive = PETSC_FALSE;
5577 
5578   /* flexible pointer used in CUSP/CUSPARSE classes */
5579   b->spptr = NULL;
5580 
5581 #if defined(PETSC_HAVE_MUMPS)
5582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5583 #endif
5584 #if defined(PETSC_HAVE_PASTIX)
5585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5586 #endif
5587 #if defined(PETSC_HAVE_SUPERLU_DIST)
5588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5589 #endif
5590 #if defined(PETSC_HAVE_CLIQUE)
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5592 #endif
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5607   PetscFunctionReturn(0);
5608 }
5609 
5610 #undef __FUNCT__
5611 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5612 /*@C
5613      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5614          and "off-diagonal" part of the matrix in CSR format.
5615 
5616    Collective on MPI_Comm
5617 
5618    Input Parameters:
5619 +  comm - MPI communicator
5620 .  m - number of local rows (Cannot be PETSC_DECIDE)
5621 .  n - This value should be the same as the local size used in creating the
5622        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5623        calculated if N is given) For square matrices n is almost always m.
5624 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5625 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5626 .   i - row indices for "diagonal" portion of matrix
5627 .   j - column indices
5628 .   a - matrix values
5629 .   oi - row indices for "off-diagonal" portion of matrix
5630 .   oj - column indices
5631 -   oa - matrix values
5632 
5633    Output Parameter:
5634 .   mat - the matrix
5635 
5636    Level: advanced
5637 
5638    Notes:
5639        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5640        must free the arrays once the matrix has been destroyed and not before.
5641 
5642        The i and j indices are 0 based
5643 
5644        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5645 
5646        This sets local rows and cannot be used to set off-processor values.
5647 
5648        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5649        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5650        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5651        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5652        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5653        communication if it is known that only local entries will be set.
5654 
5655 .keywords: matrix, aij, compressed row, sparse, parallel
5656 
5657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5658           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5659 C@*/
5660 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5661 {
5662   PetscErrorCode ierr;
5663   Mat_MPIAIJ     *maij;
5664 
5665   PetscFunctionBegin;
5666   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5667   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5668   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5669   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5670   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5671   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5672   maij = (Mat_MPIAIJ*) (*mat)->data;
5673 
5674   (*mat)->preallocated = PETSC_TRUE;
5675 
5676   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5677   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5678 
5679   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5680   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5681 
5682   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5683   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5684   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5685   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686 
5687   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5690   PetscFunctionReturn(0);
5691 }
5692 
5693 /*
5694     Special version for direct calls from Fortran
5695 */
5696 #include <petsc-private/fortranimpl.h>
5697 
5698 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5699 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5700 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5701 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5702 #endif
5703 
5704 /* Change these macros so can be used in void function */
5705 #undef CHKERRQ
5706 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5707 #undef SETERRQ2
5708 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5709 #undef SETERRQ3
5710 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5711 #undef SETERRQ
5712 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5713 
5714 #undef __FUNCT__
5715 #define __FUNCT__ "matsetvaluesmpiaij_"
5716 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5717 {
5718   Mat            mat  = *mmat;
5719   PetscInt       m    = *mm, n = *mn;
5720   InsertMode     addv = *maddv;
5721   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5722   PetscScalar    value;
5723   PetscErrorCode ierr;
5724 
5725   MatCheckPreallocated(mat,1);
5726   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5727 
5728 #if defined(PETSC_USE_DEBUG)
5729   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5730 #endif
5731   {
5732     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5733     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5734     PetscBool roworiented = aij->roworiented;
5735 
5736     /* Some Variables required in the macro */
5737     Mat        A                 = aij->A;
5738     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5739     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5740     MatScalar  *aa               = a->a;
5741     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5742     Mat        B                 = aij->B;
5743     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5744     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5745     MatScalar  *ba               = b->a;
5746 
5747     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5748     PetscInt  nonew = a->nonew;
5749     MatScalar *ap1,*ap2;
5750 
5751     PetscFunctionBegin;
5752     for (i=0; i<m; i++) {
5753       if (im[i] < 0) continue;
5754 #if defined(PETSC_USE_DEBUG)
5755       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5756 #endif
5757       if (im[i] >= rstart && im[i] < rend) {
5758         row      = im[i] - rstart;
5759         lastcol1 = -1;
5760         rp1      = aj + ai[row];
5761         ap1      = aa + ai[row];
5762         rmax1    = aimax[row];
5763         nrow1    = ailen[row];
5764         low1     = 0;
5765         high1    = nrow1;
5766         lastcol2 = -1;
5767         rp2      = bj + bi[row];
5768         ap2      = ba + bi[row];
5769         rmax2    = bimax[row];
5770         nrow2    = bilen[row];
5771         low2     = 0;
5772         high2    = nrow2;
5773 
5774         for (j=0; j<n; j++) {
5775           if (roworiented) value = v[i*n+j];
5776           else value = v[i+j*m];
5777           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5778           if (in[j] >= cstart && in[j] < cend) {
5779             col = in[j] - cstart;
5780             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5781           } else if (in[j] < 0) continue;
5782 #if defined(PETSC_USE_DEBUG)
5783           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5784 #endif
5785           else {
5786             if (mat->was_assembled) {
5787               if (!aij->colmap) {
5788                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5789               }
5790 #if defined(PETSC_USE_CTABLE)
5791               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5792               col--;
5793 #else
5794               col = aij->colmap[in[j]] - 1;
5795 #endif
5796               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5797                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5798                 col  =  in[j];
5799                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5800                 B     = aij->B;
5801                 b     = (Mat_SeqAIJ*)B->data;
5802                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5803                 rp2   = bj + bi[row];
5804                 ap2   = ba + bi[row];
5805                 rmax2 = bimax[row];
5806                 nrow2 = bilen[row];
5807                 low2  = 0;
5808                 high2 = nrow2;
5809                 bm    = aij->B->rmap->n;
5810                 ba    = b->a;
5811               }
5812             } else col = in[j];
5813             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5814           }
5815         }
5816       } else if (!aij->donotstash) {
5817         if (roworiented) {
5818           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5819         } else {
5820           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5821         }
5822       }
5823     }
5824   }
5825   PetscFunctionReturnVoid();
5826 }
5827 
5828